+++ /dev/null
-# The apt-p2p configuration file.
-#
-# This is an ini-type configuration file, using sections identified by
-# square brackets. Values are specified on a single line using the '='
-# sign. Some values indicate times, in which case a suffix of 'd' for
-# days, 'h' for hours, 'm' for minutes, and 's' for seconds can be used.
-# Some values can span multiple lines by starting the subsequent lines
-# with one or more spaces.
-#
-######################### DEFAULT ###################################
-# This is the default section containing the configuration options for the
-# main application.
-[DEFAULT]
-
-# The number of the port to listen on for requests.
-# The main application will use this TCP port to listen for requests from APT, and
-# for uploads to other peers. If a port is not specified for the DHT, it will also
-# use this UDP port to listen for DHT requests.
-PORT = 9977
-
-# Directory to store the downloaded files in
-CACHE_DIR = /var/cache/apt-p2p
-
-# Other directories containing packages to share with others
-# WARNING: all files in these directories will be hashed and available
-# for everybody to download
-# OTHER_DIRS =
-
-# Whether it's OK to use an IP addres from a known local/private range
-LOCAL_OK = no
-
-# Unload the packages cache after an interval of inactivity this long.
-# The packages cache uses a lot of memory, and only takes a few seconds
-# to reload when a new request arrives.
-UNLOAD_PACKAGES_CACHE = 5m
-
-# Refresh the DHT keys after this much time has passed.
-# This should be a time slightly less than the DHT's KEY_EXPIRE value.
-KEY_REFRESH = 57m
-
-# Which DHT implementation to use.
-# It must be possile to do "from <DHT>.DHT import DHT" to get a class that
-# implements the IDHT interface. There should also be a similarly named
-# section below to specify the options for the DHT.
-DHT = apt_p2p_Khashmir
-
-# Whether to only run the DHT (for providing only a bootstrap node)
-DHT-ONLY = no
-
-####################### apt_p2p_Khashmir ############################
-# This is the default (included) DHT to use.
-[apt_p2p_Khashmir]
-
-# To specify a different (UDP) port for the DHT to use.
-# If not specified here, the PORT value in the DEFAULT section will be used.
-# PORT =
-
-# bootstrap nodes to contact to join the DHT
-BOOTSTRAP = www.camrdale.org:9977
- steveholt.hopto.org:9976
-
-# whether this node is a bootstrap node
-BOOTSTRAP_NODE = no
-
-# Kademlia "K" constant, this should be an even number
-K = 8
-
-# SHA1 is 160 bits long
-HASH_LENGTH = 160
-
-# interval between saving the running state
-CHECKPOINT_INTERVAL = 5m
-
-# concurrent number of calls per find node/value request!
-CONCURRENT_REQS = 4
-
-# how many hosts to post values to
-STORE_REDUNDANCY = 3
-
-# How many values to attempt to retrieve from the DHT.
-# Setting this to 0 will try and get all values (which could take a while if
-# a lot of nodes have values). Setting it negative will try to get that
-# number of results from only the closest STORE_REDUNDANCY nodes to the hash.
-# The default is a large negative number so all values from the closest
-# STORE_REDUNDANCY nodes will be retrieved.
-RETRIEVE_VALUES = -10000
-
-# how many times in a row a node can fail to respond before it's booted from the routing table
-MAX_FAILURES = 3
-
-# never ping a node more often than this
-MIN_PING_INTERVAL = 15m
-
-# refresh buckets that haven't been touched in this long
-BUCKET_STALENESS = 1h
-
-# expire unrefreshed entries older than this
-KEY_EXPIRE = 1h
-
-# whether to spew info about the requests/responses in the protocol
-SPEW = no
+++ /dev/null
-#!/usr/bin/env python
-
-# Load apt-p2p application
-#
-# There are two ways apt-p2p can be started:
-# 1. twistd -y apt-p2p
-# - twistd will load this file and execute the app
-# in 'application' variable
-# 2. from command line
-# - __name__ will be '__main__'
-
-import pwd,sys
-
-from twisted.application import service, internet, app, strports
-from twisted.internet import reactor
-from twisted.python import usage, log
-from twisted.web2 import channel
-
-from apt_p2p.apt_p2p_conf import config, version, DEFAULT_CONFIG_FILES
-from apt_p2p.interfaces import IDHT
-
-config_file = ''
-
-if __name__ == '__main__':
- # Parse command line parameters when started on command line
- class AptP2POptions(usage.Options):
- optFlags = [
- ['help', 'h', 'Print this help message'],
- ]
- optParameters = [
- ['config-file', 'c', '', "Configuration file"],
- ['log-file', 'l', '-', "File to log to, - for stdout"],
- ]
- longdesc="apt-p2p is a peer-to-peer downloader for apt users"
- def opt_version(self):
- print "apt-p2p %s" % version.short()
- sys.exit(0)
-
- opts = AptP2POptions()
- try:
- opts.parseOptions()
- except usage.UsageError, ue:
- print '%s: %s' % (sys.argv[0], ue)
- sys.exit(1)
-
- config_file = opts.opts['config-file']
- log_file = opts.opts['log-file']
- if log_file == '-':
- f = sys.stdout
- else:
- f = open(log_file, 'w')
- log.startLogging(f, setStdout=1)
-
-log.msg("Loading config files: '%s'" % "', '".join(DEFAULT_CONFIG_FILES + [config_file]))
-config_read = config.read(DEFAULT_CONFIG_FILES + [config_file])
-log.msg("Successfully loaded config files: '%s'" % "', '".join(config_read))
-if config.has_option('DEFAULT', 'username') and config.get('DEFAULT', 'username'):
- uid,gid = pwd.getpwnam(config.get('DEFAULT', 'username'))[2:4]
-else:
- uid,gid = None,None
-
-log.msg('Starting application')
-application = service.Application("apt-p2p", uid, gid)
-#print service.IProcess(application).processName
-#service.IProcess(application).processName = 'apt-p2p'
-
-log.msg('Starting DHT')
-DHT = __import__(config.get('DEFAULT', 'DHT')+'.DHT', globals(), locals(), ['DHT'])
-assert IDHT.implementedBy(DHT.DHT), "You must provide a DHT implementation that implements the IDHT interface."
-myDHT = DHT.DHT()
-
-if not config.getboolean('DEFAULT', 'DHT-only'):
- log.msg('Starting main application server')
- from apt_p2p.apt_p2p import AptP2P
- myapp = AptP2P(myDHT)
- factory = myapp.getHTTPFactory()
- s = strports.service('tcp:'+config.get('DEFAULT', 'port'), factory)
- s.setServiceParent(application)
-else:
- myDHT.loadConfig(config, config.get('DEFAULT', 'DHT'))
- myDHT.join()
-
-if __name__ == '__main__':
- # Run on command line
- service.IServiceCollection(application).privilegedStartService()
- service.IServiceCollection(application).startService()
- reactor.run()
--- /dev/null
+# The apt-p2p configuration file.
+#
+# This is an ini-type configuration file, using sections identified by
+# square brackets. Values are specified on a single line using the '='
+# sign. Some values indicate times, in which case a suffix of 'd' for
+# days, 'h' for hours, 'm' for minutes, and 's' for seconds can be used.
+# Some values can span multiple lines by starting the subsequent lines
+# with one or more spaces.
+#
+######################### DEFAULT ###################################
+# This is the default section containing the configuration options for the
+# main application.
+[DEFAULT]
+
+# The number of the port to listen on for requests.
+# The main application will use this TCP port to listen for requests from APT, and
+# for uploads to other peers. If a port is not specified for the DHT, it will also
+# use this UDP port to listen for DHT requests.
+PORT = 9977
+
+# Directory to store the downloaded files in
+CACHE_DIR = /var/cache/apt-p2p
+
+# Other directories containing packages to share with others
+# WARNING: all files in these directories will be hashed and available
+# for everybody to download
+# OTHER_DIRS =
+
+# Whether it's OK to use an IP addres from a known local/private range
+LOCAL_OK = no
+
+# Unload the packages cache after an interval of inactivity this long.
+# The packages cache uses a lot of memory, and only takes a few seconds
+# to reload when a new request arrives.
+UNLOAD_PACKAGES_CACHE = 5m
+
+# Refresh the DHT keys after this much time has passed.
+# This should be a time slightly less than the DHT's KEY_EXPIRE value.
+KEY_REFRESH = 57m
+
+# Which DHT implementation to use.
+# It must be possile to do "from <DHT>.DHT import DHT" to get a class that
+# implements the IDHT interface. There should also be a similarly named
+# section below to specify the options for the DHT.
+DHT = apt_p2p_Khashmir
+
+# Whether to only run the DHT (for providing only a bootstrap node)
+DHT-ONLY = no
+
+####################### apt_p2p_Khashmir ############################
+# This is the default (included) DHT to use.
+[apt_p2p_Khashmir]
+
+# To specify a different (UDP) port for the DHT to use.
+# If not specified here, the PORT value in the DEFAULT section will be used.
+# PORT =
+
+# bootstrap nodes to contact to join the DHT
+BOOTSTRAP = www.camrdale.org:9977
+ steveholt.hopto.org:9976
+
+# whether this node is a bootstrap node
+BOOTSTRAP_NODE = no
+
+# Kademlia "K" constant, this should be an even number
+K = 8
+
+# SHA1 is 160 bits long
+HASH_LENGTH = 160
+
+# interval between saving the running state
+CHECKPOINT_INTERVAL = 5m
+
+# concurrent number of calls per find node/value request!
+CONCURRENT_REQS = 4
+
+# how many hosts to post values to
+STORE_REDUNDANCY = 3
+
+# How many values to attempt to retrieve from the DHT.
+# Setting this to 0 will try and get all values (which could take a while if
+# a lot of nodes have values). Setting it negative will try to get that
+# number of results from only the closest STORE_REDUNDANCY nodes to the hash.
+# The default is a large negative number so all values from the closest
+# STORE_REDUNDANCY nodes will be retrieved.
+RETRIEVE_VALUES = -10000
+
+# how many times in a row a node can fail to respond before it's booted from the routing table
+MAX_FAILURES = 3
+
+# never ping a node more often than this
+MIN_PING_INTERVAL = 15m
+
+# refresh buckets that haven't been touched in this long
+BUCKET_STALENESS = 1h
+
+# expire unrefreshed entries older than this
+KEY_EXPIRE = 1h
+
+# whether to spew info about the requests/responses in the protocol
+SPEW = no
--- /dev/null
+#!/usr/bin/env python
+
+# Load apt-p2p application
+#
+# There are two ways apt-p2p can be started:
+# 1. twistd -y apt-p2p
+# - twistd will load this file and execute the app
+# in 'application' variable
+# 2. from command line
+# - __name__ will be '__main__'
+
+import pwd,sys
+
+from twisted.application import service, internet, app, strports
+from twisted.internet import reactor
+from twisted.python import usage, log
+from twisted.web2 import channel
+
+from apt_p2p.apt_p2p_conf import config, version, DEFAULT_CONFIG_FILES
+from apt_p2p.interfaces import IDHT
+
+config_file = ''
+
+if __name__ == '__main__':
+ # Parse command line parameters when started on command line
+ class AptP2POptions(usage.Options):
+ optFlags = [
+ ['help', 'h', 'Print this help message'],
+ ]
+ optParameters = [
+ ['config-file', 'c', '', "Configuration file"],
+ ['log-file', 'l', '-', "File to log to, - for stdout"],
+ ]
+ longdesc="apt-p2p is a peer-to-peer downloader for apt users"
+ def opt_version(self):
+ print "apt-p2p %s" % version.short()
+ sys.exit(0)
+
+ opts = AptP2POptions()
+ try:
+ opts.parseOptions()
+ except usage.UsageError, ue:
+ print '%s: %s' % (sys.argv[0], ue)
+ sys.exit(1)
+
+ config_file = opts.opts['config-file']
+ log_file = opts.opts['log-file']
+ if log_file == '-':
+ f = sys.stdout
+ else:
+ f = open(log_file, 'w')
+ log.startLogging(f, setStdout=1)
+
+log.msg("Loading config files: '%s'" % "', '".join(DEFAULT_CONFIG_FILES + [config_file]))
+config_read = config.read(DEFAULT_CONFIG_FILES + [config_file])
+log.msg("Successfully loaded config files: '%s'" % "', '".join(config_read))
+if config.has_option('DEFAULT', 'username') and config.get('DEFAULT', 'username'):
+ uid,gid = pwd.getpwnam(config.get('DEFAULT', 'username'))[2:4]
+else:
+ uid,gid = None,None
+
+log.msg('Starting application')
+application = service.Application("apt-p2p", uid, gid)
+#print service.IProcess(application).processName
+#service.IProcess(application).processName = 'apt-p2p'
+
+log.msg('Starting DHT')
+DHT = __import__(config.get('DEFAULT', 'DHT')+'.DHT', globals(), locals(), ['DHT'])
+assert IDHT.implementedBy(DHT.DHT), "You must provide a DHT implementation that implements the IDHT interface."
+myDHT = DHT.DHT()
+
+if not config.getboolean('DEFAULT', 'DHT-only'):
+ log.msg('Starting main application server')
+ from apt_p2p.apt_p2p import AptP2P
+ myapp = AptP2P(myDHT)
+ factory = myapp.getHTTPFactory()
+ s = strports.service('tcp:'+config.get('DEFAULT', 'port'), factory)
+ s.setServiceParent(application)
+else:
+ myDHT.loadConfig(config, config.get('DEFAULT', 'DHT'))
+ myDHT.join()
+
+if __name__ == '__main__':
+ # Run on command line
+ service.IServiceCollection(application).privilegedStartService()
+ service.IServiceCollection(application).startService()
+ reactor.run()
+++ /dev/null
-#
-# Copyright (C) 2002 Manuel Estrada Sainz <ranty@debian.org>
-# Copyright (C) 2008 Cameron Dale <camrdale@gmail.com>
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of version 2.1 of the GNU General Public
-# License as published by the Free Software Foundation.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-"""Manage a mirror's index files.
-
-@type TRACKED_FILES: C{list} of C{string}
-@var TRACKED_FILES: the file names of files that contain index information
-"""
-
-# Disable the FutureWarning from the apt module
-import warnings
-warnings.simplefilter("ignore", FutureWarning)
-
-import os, shelve
-from random import choice
-from shutil import rmtree
-from copy import deepcopy
-from UserDict import DictMixin
-
-from twisted.internet import threads, defer, reactor
-from twisted.python import log
-from twisted.python.filepath import FilePath
-from twisted.trial import unittest
-
-import apt_pkg, apt_inst
-from apt import OpProgress
-from debian_bundle import deb822
-
-from Hash import HashObject
-
-apt_pkg.init()
-
-TRACKED_FILES = ['release', 'sources', 'packages']
-
-class PackageFileList(DictMixin):
- """Manages a list of index files belonging to a mirror.
-
- @type cache_dir: L{twisted.python.filepath.FilePath}
- @ivar cache_dir: the directory to use for storing all files
- @type packages: C{shelve dictionary}
- @ivar packages: the files tracked for this mirror
- """
-
- def __init__(self, cache_dir):
- """Initialize the list by opening the dictionary."""
- self.cache_dir = cache_dir
- self.cache_dir.restat(False)
- if not self.cache_dir.exists():
- self.cache_dir.makedirs()
- self.packages = None
- self.open()
-
- def open(self):
- """Open the persistent dictionary of files for this mirror."""
- if self.packages is None:
- self.packages = shelve.open(self.cache_dir.child('packages.db').path)
-
- def close(self):
- """Close the persistent dictionary."""
- if self.packages is not None:
- self.packages.close()
-
- def update_file(self, cache_path, file_path):
- """Check if an updated file needs to be tracked.
-
- Called from the mirror manager when files get updated so we can update our
- fake lists and sources.list.
-
- @type cache_path: C{string}
- @param cache_path: the location of the file within the mirror
- @type file_path: L{twisted.python.filepath.FilePath}
- @param file_path: The location of the file in the file system
- @rtype: C{boolean}
- @return: whether the file is an index file
- """
- filename = cache_path.split('/')[-1]
- if filename.lower() in TRACKED_FILES:
- log.msg("Registering package file: "+cache_path)
- self.packages[cache_path] = file_path
- return True
- return False
-
- def check_files(self):
- """Check all files in the database to remove any that don't exist."""
- files = self.packages.keys()
- for f in files:
- self.packages[f].restat(False)
- if not self.packages[f].exists():
- log.msg("File in packages database has been deleted: "+f)
- del self.packages[f]
-
- #{ Dictionary interface details
- def __getitem__(self, key): return self.packages[key]
- def __setitem__(self, key, item): self.packages[key] = item
- def __delitem__(self, key): del self.packages[key]
- def keys(self): return self.packages.keys()
-
-class AptPackages:
- """Answers queries about packages available from a mirror.
-
- Uses the python-apt tools to parse and provide information about the
- files that are available on a single mirror.
-
- @ivar DEFAULT_APT_CONFIG: the default configuration parameters to use for apt
- @ivar essential_dirs: directories that must be created for apt to work
- @ivar essential_files: files that must be created for apt to work
- @type cache_dir: L{twisted.python.filepath.FilePath}
- @ivar cache_dir: the directory to use for storing all files
- @type unload_delay: C{int}
- @ivar unload_delay: the time to wait before unloading the apt cache
- @ivar apt_config: the configuration parameters to use for apt
- @type packages: L{PackageFileList}
- @ivar packages: the persistent storage of tracked apt index files
- @type loaded: C{boolean}
- @ivar loaded: whether the apt cache is currently loaded
- @type loading: L{twisted.internet.defer.Deferred}
- @ivar loading: if the cache is currently being loaded, this will be
- called when it is loaded, otherwise it is None
- @type unload_later: L{twisted.internet.interfaces.IDelayedCall}
- @ivar unload_later: the delayed call to unload the apt cache
- @type indexrecords: C{dictionary}
- @ivar indexrecords: the hashes of index files for the mirror, keys are
- mirror directories, values are dictionaries with keys the path to the
- index file in the mirror directory and values are dictionaries with
- keys the hash type and values the hash
- @type cache: C{apt_pkg.GetCache()}
- @ivar cache: the apt cache of the mirror
- @type records: C{apt_pkg.GetPkgRecords()}
- @ivar records: the apt package records for all binary packages in a mirror
- @type srcrecords: C{apt_pkg.GetPkgSrcRecords}
- @ivar srcrecords: the apt package records for all source packages in a mirror
- """
-
- DEFAULT_APT_CONFIG = {
- #'APT' : '',
- #'APT::Architecture' : 'i386', # Commented so the machine's config will set this
- #'APT::Default-Release' : 'unstable',
- 'Dir':'.', # /
- 'Dir::State' : 'apt/', # var/lib/apt/
- 'Dir::State::Lists': 'lists/', # lists/
- #'Dir::State::cdroms' : 'cdroms.list',
- 'Dir::State::userstatus' : 'status.user',
- 'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
- 'Dir::Cache' : '.apt/cache/', # var/cache/apt/
- #'Dir::Cache::archives' : 'archives/',
- 'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
- 'Dir::Cache::pkgcache' : 'pkgcache.bin',
- 'Dir::Etc' : 'apt/etc/', # etc/apt/
- 'Dir::Etc::sourcelist' : 'sources.list',
- 'Dir::Etc::vendorlist' : 'vendors.list',
- 'Dir::Etc::vendorparts' : 'vendors.list.d',
- #'Dir::Etc::main' : 'apt.conf',
- #'Dir::Etc::parts' : 'apt.conf.d',
- #'Dir::Etc::preferences' : 'preferences',
- 'Dir::Bin' : '',
- #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
- 'Dir::Bin::dpkg' : '/usr/bin/dpkg',
- #'DPkg' : '',
- #'DPkg::Pre-Install-Pkgs' : '',
- #'DPkg::Tools' : '',
- #'DPkg::Tools::Options' : '',
- #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
- #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
- #'DPkg::Post-Invoke' : '',
- }
- essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
- 'apt/lists/partial')
- essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
-
- def __init__(self, cache_dir, unload_delay):
- """Construct a new packages manager.
-
- @param cache_dir: directory to use to store files for this mirror
- """
- self.cache_dir = cache_dir
- self.unload_delay = unload_delay
- self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
-
- # Create the necessary files and directories for apt
- for dir in self.essential_dirs:
- path = self.cache_dir.preauthChild(dir)
- if not path.exists():
- path.makedirs()
- for file in self.essential_files:
- path = self.cache_dir.preauthChild(file)
- if not path.exists():
- path.touch()
-
- self.apt_config['Dir'] = self.cache_dir.path
- self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path
- self.packages = PackageFileList(cache_dir)
- self.loaded = False
- self.loading = None
- self.unload_later = None
-
- def __del__(self):
- self.cleanup()
-
- def addRelease(self, cache_path, file_path):
- """Add a Release file's info to the list of index files.
-
- Dirty hack until python-apt supports apt-pkg/indexrecords.h
- (see Bug #456141)
- """
- self.indexrecords[cache_path] = {}
-
- read_packages = False
- f = file_path.open('r')
-
- # Use python-debian routines to parse the file for hashes
- rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256'])
- for hash_type in rel:
- for file in rel[hash_type]:
- self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file[hash_type], file['size'])
-
- f.close()
-
- def file_updated(self, cache_path, file_path):
- """A file in the mirror has changed or been added.
-
- If this affects us, unload our apt database.
- @see: L{PackageFileList.update_file}
- """
- if self.packages.update_file(cache_path, file_path):
- self.unload()
-
- def load(self):
- """Make sure the package cache is initialized and loaded."""
- # Reset the pending unload call
- if self.unload_later and self.unload_later.active():
- self.unload_later.reset(self.unload_delay)
- else:
- self.unload_later = reactor.callLater(self.unload_delay, self.unload)
-
- # Make sure it's not already being loaded
- if self.loading is None:
- log.msg('Loading the packages cache')
- self.loading = threads.deferToThread(self._load)
- self.loading.addCallback(self.doneLoading)
- return self.loading
-
- def doneLoading(self, loadResult):
- """Cache is loaded."""
- self.loading = None
- # Must pass on the result for the next callback
- return loadResult
-
- def _load(self):
- """Regenerates the fake configuration and loads the packages caches."""
- if self.loaded: return True
-
- # Modify the default configuration to create the fake one.
- apt_pkg.InitSystem()
- self.cache_dir.preauthChild(self.apt_config['Dir::State']
- ).preauthChild(self.apt_config['Dir::State::Lists']).remove()
- self.cache_dir.preauthChild(self.apt_config['Dir::State']
- ).preauthChild(self.apt_config['Dir::State::Lists']
- ).child('partial').makedirs()
- sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc']
- ).preauthChild(self.apt_config['Dir::Etc::sourcelist'])
- sources = sources_file.open('w')
- sources_count = 0
- deb_src_added = False
- self.packages.check_files()
- self.indexrecords = {}
-
- # Create an entry in sources.list for each needed index file
- for f in self.packages:
- # we should probably clear old entries from self.packages and
- # take into account the recorded mtime as optimization
- file = self.packages[f]
- if f.split('/')[-1] == "Release":
- self.addRelease(f, file)
- fake_uri='http://apt-p2p'+f
- fake_dirname = '/'.join(fake_uri.split('/')[:-1])
- if f.endswith('Sources'):
- deb_src_added = True
- source_line='deb-src '+fake_dirname+'/ /'
- else:
- source_line='deb '+fake_dirname+'/ /'
- listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State']
- ).preauthChild(self.apt_config['Dir::State::Lists']
- ).child(apt_pkg.URItoFileName(fake_uri))
- sources.write(source_line+'\n')
- log.msg("Sources line: " + source_line)
- sources_count = sources_count + 1
-
- if listpath.exists():
- #we should empty the directory instead
- listpath.remove()
- os.symlink(file.path, listpath.path)
- sources.close()
-
- if sources_count == 0:
- log.msg("No Packages files available for %s backend"%(self.cache_dir.path))
- return False
-
- log.msg("Loading Packages database for "+self.cache_dir.path)
- for key, value in self.apt_config.items():
- apt_pkg.Config[key] = value
-
- self.cache = apt_pkg.GetCache(OpProgress())
- self.records = apt_pkg.GetPkgRecords(self.cache)
- if deb_src_added:
- self.srcrecords = apt_pkg.GetPkgSrcRecords()
- else:
- self.srcrecords = None
-
- self.loaded = True
- return True
-
- def unload(self):
- """Tries to make the packages server quit."""
- if self.unload_later and self.unload_later.active():
- self.unload_later.cancel()
- self.unload_later = None
- if self.loaded:
- log.msg('Unloading the packages cache')
- # This should save memory
- del self.cache
- del self.records
- del self.srcrecords
- del self.indexrecords
- self.loaded = False
-
- def cleanup(self):
- """Cleanup and close any loaded caches."""
- self.unload()
- if self.unload_later and self.unload_later.active():
- self.unload_later.cancel()
- self.packages.close()
-
- def findHash(self, path):
- """Find the hash for a given path in this mirror.
-
- @type path: C{string}
- @param path: the path within the mirror of the file to lookup
- @rtype: L{twisted.internet.defer.Deferred}
- @return: a deferred so it can make sure the cache is loaded first
- """
- d = defer.Deferred()
-
- deferLoad = self.load()
- deferLoad.addCallback(self._findHash, path, d)
- deferLoad.addErrback(self._findHash_error, path, d)
-
- return d
-
- def _findHash_error(self, failure, path, d):
- """An error occurred, return an empty hash."""
- log.msg('An error occurred while looking up a hash for: %s' % path)
- log.err(failure)
- d.callback(HashObject())
- return failure
-
- def _findHash(self, loadResult, path, d):
- """Search the records for the hash of a path.
-
- @type loadResult: C{boolean}
- @param loadResult: whether apt's cache was successfully loaded
- @type path: C{string}
- @param path: the path within the mirror of the file to lookup
- @type d: L{twisted.internet.defer.Deferred}
- @param d: the deferred to callback with the result
- """
- if not loadResult:
- d.callback(HashObject())
- return loadResult
-
- h = HashObject()
-
- # First look for the path in the cache of index files
- for release in self.indexrecords:
- if path.startswith(release[:-7]):
- for indexFile in self.indexrecords[release]:
- if release[:-7] + indexFile == path:
- h.setFromIndexRecord(self.indexrecords[release][indexFile])
- d.callback(h)
- return loadResult
-
- package = path.split('/')[-1].split('_')[0]
-
- # Check the binary packages
- try:
- for version in self.cache[package].VersionList:
- size = version.Size
- for verFile in version.FileList:
- if self.records.Lookup(verFile):
- if '/' + self.records.FileName == path:
- h.setFromPkgRecord(self.records, size)
- d.callback(h)
- return loadResult
- except KeyError:
- pass
-
- # Check the source packages' files
- if self.srcrecords:
- self.srcrecords.Restart()
- if self.srcrecords.Lookup(package):
- for f in self.srcrecords.Files:
- if path == '/' + f[2]:
- h.setFromSrcRecord(f)
- d.callback(h)
- return loadResult
-
- d.callback(h)
-
- # Have to pass the returned loadResult on in case other calls to this function are pending.
- return loadResult
-
-class TestAptPackages(unittest.TestCase):
- """Unit tests for the AptPackages cache."""
-
- pending_calls = []
- client = None
- timeout = 10
- packagesFile = ''
- sourcesFile = ''
- releaseFile = ''
-
- def setUp(self):
- """Initializes the cache with files found in the traditional apt location."""
- self.client = AptPackages(FilePath('/tmp/.apt-p2p'), 300)
-
- # Find the largest index files that are for 'main'
- self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
- self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
-
- # Find the Release file corresponding to the found Packages file
- for f in os.walk('/var/lib/apt/lists').next()[2]:
- if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
- self.releaseFile = f
- break
-
- # Add all the found files to the PackageFileList
- self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'),
- FilePath('/var/lib/apt/lists/' + self.releaseFile))
- self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'),
- FilePath('/var/lib/apt/lists/' + self.packagesFile))
- self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'),
- FilePath('/var/lib/apt/lists/' + self.sourcesFile))
-
- def test_pkg_hash(self):
- """Tests loading the binary package records cache."""
- self.client._load()
-
- self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
-
- pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.packagesFile +
- ' | grep -E "^SHA1:" | head -n 1' +
- ' | cut -d\ -f 2').read().rstrip('\n')
-
- self.failUnless(self.client.records.SHA1Hash == pkg_hash,
- "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
-
- def test_src_hash(self):
- """Tests loading the source package records cache."""
- self.client._load()
-
- self.client.srcrecords.Lookup('dpkg')
-
- src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.sourcesFile +
- ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
- ' | cut -d\ -f 2').read().split('\n')[:-1]
-
- for f in self.client.srcrecords.Files:
- self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
-
- def test_index_hash(self):
- """Tests loading the cache of index file information."""
- self.client._load()
-
- indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
-
- idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
- '/var/lib/apt/lists/' + self.releaseFile +
- ' | grep -E " main/binary-i386/Packages.bz2$"'
- ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
-
- self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
-
- def verifyHash(self, found_hash, path, true_hash):
- self.failUnless(found_hash.hexexpected() == true_hash,
- "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
-
- def test_findIndexHash(self):
- """Tests finding the hash of a single index file."""
- lastDefer = defer.Deferred()
-
- idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
- '/var/lib/apt/lists/' + self.releaseFile +
- ' | grep -E " main/binary-i386/Packages.bz2$"'
- ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
- idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
-
- d = self.client.findHash(idx_path)
- d.addCallback(self.verifyHash, idx_path, idx_hash)
-
- d.addBoth(lastDefer.callback)
- return lastDefer
-
- def test_findPkgHash(self):
- """Tests finding the hash of a single binary package."""
- lastDefer = defer.Deferred()
-
- pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.packagesFile +
- ' | grep -E "^SHA1:" | head -n 1' +
- ' | cut -d\ -f 2').read().rstrip('\n')
- pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.packagesFile +
- ' | grep -E "^Filename:" | head -n 1' +
- ' | cut -d\ -f 2').read().rstrip('\n')
-
- d = self.client.findHash(pkg_path)
- d.addCallback(self.verifyHash, pkg_path, pkg_hash)
-
- d.addBoth(lastDefer.callback)
- return lastDefer
-
- def test_findSrcHash(self):
- """Tests finding the hash of a single source package."""
- lastDefer = defer.Deferred()
-
- src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.sourcesFile +
- ' | grep -E "^Directory:" | head -n 1' +
- ' | cut -d\ -f 2').read().rstrip('\n')
- src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.sourcesFile +
- ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
- ' | cut -d\ -f 2').read().split('\n')[:-1]
- src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.sourcesFile +
- ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
- ' | cut -d\ -f 4').read().split('\n')[:-1]
-
- i = choice(range(len(src_hashes)))
- d = self.client.findHash(src_dir + '/' + src_paths[i])
- d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
-
- d.addBoth(lastDefer.callback)
- return lastDefer
-
- def test_multipleFindHash(self):
- """Tests finding the hash of an index file, binary package, source package, and another index file."""
- lastDefer = defer.Deferred()
-
- # Lookup a Packages.bz2 file
- idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
- '/var/lib/apt/lists/' + self.releaseFile +
- ' | grep -E " main/binary-i386/Packages.bz2$"'
- ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
- idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
-
- d = self.client.findHash(idx_path)
- d.addCallback(self.verifyHash, idx_path, idx_hash)
-
- # Lookup the binary 'dpkg' package
- pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.packagesFile +
- ' | grep -E "^SHA1:" | head -n 1' +
- ' | cut -d\ -f 2').read().rstrip('\n')
- pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.packagesFile +
- ' | grep -E "^Filename:" | head -n 1' +
- ' | cut -d\ -f 2').read().rstrip('\n')
-
- d = self.client.findHash(pkg_path)
- d.addCallback(self.verifyHash, pkg_path, pkg_hash)
-
- # Lookup the source 'dpkg' package
- src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.sourcesFile +
- ' | grep -E "^Directory:" | head -n 1' +
- ' | cut -d\ -f 2').read().rstrip('\n')
- src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.sourcesFile +
- ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
- ' | cut -d\ -f 2').read().split('\n')[:-1]
- src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.sourcesFile +
- ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
- ' | cut -d\ -f 4').read().split('\n')[:-1]
-
- for i in range(len(src_hashes)):
- d = self.client.findHash(src_dir + '/' + src_paths[i])
- d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
-
- # Lookup a Sources.bz2 file
- idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
- '/var/lib/apt/lists/' + self.releaseFile +
- ' | grep -E " main/source/Sources.bz2$"'
- ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
- idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
-
- d = self.client.findHash(idx_path)
- d.addCallback(self.verifyHash, idx_path, idx_hash)
-
- d.addBoth(lastDefer.callback)
- return lastDefer
-
- def tearDown(self):
- for p in self.pending_calls:
- if p.active():
- p.cancel()
- self.pending_calls = []
- self.client.cleanup()
- self.client = None
+++ /dev/null
-
-"""Manage a cache of downloaded files.
-
-@var DECOMPRESS_EXTS: a list of file extensions that need to be decompressed
-@var DECOMPRESS_FILES: a list of file names that need to be decompressed
-"""
-
-from bz2 import BZ2Decompressor
-from zlib import decompressobj, MAX_WBITS
-from gzip import FCOMMENT, FEXTRA, FHCRC, FNAME, FTEXT
-from urlparse import urlparse
-import os
-
-from twisted.python import log
-from twisted.python.filepath import FilePath
-from twisted.internet import defer, reactor
-from twisted.trial import unittest
-from twisted.web2 import stream
-from twisted.web2.http import splitHostPort
-
-from Hash import HashObject
-
-DECOMPRESS_EXTS = ['.gz', '.bz2']
-DECOMPRESS_FILES = ['release', 'sources', 'packages']
-
-class ProxyFileStream(stream.SimpleStream):
- """Saves a stream to a file while providing a new stream.
-
- Also optionally decompresses the file while it is being downloaded.
-
- @type stream: L{twisted.web2.stream.IByteStream}
- @ivar stream: the input stream being read
- @type outFile: L{twisted.python.filepath.FilePath}
- @ivar outFile: the file being written
- @type hash: L{Hash.HashObject}
- @ivar hash: the hash object for the file
- @type gzfile: C{file}
- @ivar gzfile: the open file to write decompressed gzip data to
- @type gzdec: L{zlib.decompressobj}
- @ivar gzdec: the decompressor to use for the compressed gzip data
- @type gzheader: C{boolean}
- @ivar gzheader: whether the gzip header still needs to be removed from
- the zlib compressed data
- @type bz2file: C{file}
- @ivar bz2file: the open file to write decompressed bz2 data to
- @type bz2dec: L{bz2.BZ2Decompressor}
- @ivar bz2dec: the decompressor to use for the compressed bz2 data
- @type length: C{int}
- @ivar length: the length of the original (compressed) file
- @type doneDefer: L{twisted.internet.defer.Deferred}
- @ivar doneDefer: the deferred that will fire when done streaming
-
- @group Stream implementation: read, close
-
- """
-
- def __init__(self, stream, outFile, hash, decompress = None, decFile = None):
- """Initializes the proxy.
-
- @type stream: L{twisted.web2.stream.IByteStream}
- @param stream: the input stream to read from
- @type outFile: L{twisted.python.filepath.FilePath}
- @param outFile: the file to write to
- @type hash: L{Hash.HashObject}
- @param hash: the hash object to use for the file
- @type decompress: C{string}
- @param decompress: also decompress the file as this type
- (currently only '.gz' and '.bz2' are supported)
- @type decFile: C{twisted.python.FilePath}
- @param decFile: the file to write the decompressed data to
- """
- self.stream = stream
- self.outFile = outFile.open('w')
- self.hash = hash
- self.hash.new()
- self.gzfile = None
- self.bz2file = None
- if decompress == ".gz":
- self.gzheader = True
- self.gzfile = decFile.open('w')
- self.gzdec = decompressobj(-MAX_WBITS)
- elif decompress == ".bz2":
- self.bz2file = decFile.open('w')
- self.bz2dec = BZ2Decompressor()
- self.length = self.stream.length
- self.doneDefer = defer.Deferred()
-
- def _done(self):
- """Close all the output files, return the result."""
- if not self.outFile.closed:
- self.outFile.close()
- self.hash.digest()
- if self.gzfile:
- # Finish the decompression
- data_dec = self.gzdec.flush()
- self.gzfile.write(data_dec)
- self.gzfile.close()
- self.gzfile = None
- if self.bz2file:
- self.bz2file.close()
- self.bz2file = None
-
- self.doneDefer.callback(self.hash)
-
- def read(self):
- """Read some data from the stream."""
- if self.outFile.closed:
- return None
-
- # Read data from the stream, deal with the possible deferred
- data = self.stream.read()
- if isinstance(data, defer.Deferred):
- data.addCallbacks(self._write, self._done)
- return data
-
- self._write(data)
- return data
-
- def _write(self, data):
- """Write the stream data to the file and return it for others to use.
-
- Also optionally decompresses it.
- """
- if data is None:
- self._done()
- return data
-
- # Write and hash the streamed data
- self.outFile.write(data)
- self.hash.update(data)
-
- if self.gzfile:
- # Decompress the zlib portion of the file
- if self.gzheader:
- # Remove the gzip header junk
- self.gzheader = False
- new_data = self._remove_gzip_header(data)
- dec_data = self.gzdec.decompress(new_data)
- else:
- dec_data = self.gzdec.decompress(data)
- self.gzfile.write(dec_data)
- if self.bz2file:
- # Decompress the bz2 file
- dec_data = self.bz2dec.decompress(data)
- self.bz2file.write(dec_data)
-
- return data
-
- def _remove_gzip_header(self, data):
- """Remove the gzip header from the zlib compressed data."""
- # Read, check & discard the header fields
- if data[:2] != '\037\213':
- raise IOError, 'Not a gzipped file'
- if ord(data[2]) != 8:
- raise IOError, 'Unknown compression method'
- flag = ord(data[3])
- # modtime = self.fileobj.read(4)
- # extraflag = self.fileobj.read(1)
- # os = self.fileobj.read(1)
-
- skip = 10
- if flag & FEXTRA:
- # Read & discard the extra field
- xlen = ord(data[10])
- xlen = xlen + 256*ord(data[11])
- skip = skip + 2 + xlen
- if flag & FNAME:
- # Read and discard a null-terminated string containing the filename
- while True:
- if not data[skip] or data[skip] == '\000':
- break
- skip += 1
- skip += 1
- if flag & FCOMMENT:
- # Read and discard a null-terminated string containing a comment
- while True:
- if not data[skip] or data[skip] == '\000':
- break
- skip += 1
- skip += 1
- if flag & FHCRC:
- skip += 2 # Read & discard the 16-bit header CRC
-
- return data[skip:]
-
- def close(self):
- """Clean everything up and return None to future reads."""
- self.length = 0
- self._done()
- self.stream.close()
-
-class CacheManager:
- """Manages all downloaded files and requests for cached objects.
-
- @type cache_dir: L{twisted.python.filepath.FilePath}
- @ivar cache_dir: the directory to use for storing all files
- @type other_dirs: C{list} of L{twisted.python.filepath.FilePath}
- @ivar other_dirs: the other directories that have shared files in them
- @type all_dirs: C{list} of L{twisted.python.filepath.FilePath}
- @ivar all_dirs: all the directories that have cached files in them
- @type db: L{db.DB}
- @ivar db: the database to use for tracking files and hashes
- @type manager: L{apt_p2p.AptP2P}
- @ivar manager: the main program object to send requests to
- @type scanning: C{list} of L{twisted.python.filepath.FilePath}
- @ivar scanning: all the directories that are currectly being scanned or waiting to be scanned
- """
-
- def __init__(self, cache_dir, db, other_dirs = [], manager = None):
- """Initialize the instance and remove any untracked files from the DB..
-
- @type cache_dir: L{twisted.python.filepath.FilePath}
- @param cache_dir: the directory to use for storing all files
- @type db: L{db.DB}
- @param db: the database to use for tracking files and hashes
- @type other_dirs: C{list} of L{twisted.python.filepath.FilePath}
- @param other_dirs: the other directories that have shared files in them
- (optional, defaults to only using the cache directory)
- @type manager: L{apt_p2p.AptP2P}
- @param manager: the main program object to send requests to
- (optional, defaults to not calling back with cached files)
- """
- self.cache_dir = cache_dir
- self.other_dirs = other_dirs
- self.all_dirs = self.other_dirs[:]
- self.all_dirs.insert(0, self.cache_dir)
- self.db = db
- self.manager = manager
- self.scanning = []
-
- # Init the database, remove old files
- self.db.removeUntrackedFiles(self.all_dirs)
-
- #{ Scanning directories
- def scanDirectories(self):
- """Scan the cache directories, hashing new and rehashing changed files."""
- assert not self.scanning, "a directory scan is already under way"
- self.scanning = self.all_dirs[:]
- self._scanDirectories()
-
- def _scanDirectories(self, result = None, walker = None):
- """Walk each directory looking for cached files.
-
- @param result: the result of a DHT store request, not used (optional)
- @param walker: the walker to use to traverse the current directory
- (optional, defaults to creating a new walker from the first
- directory in the L{CacheManager.scanning} list)
- """
- # Need to start walking a new directory
- if walker is None:
- # If there are any left, get them
- if self.scanning:
- log.msg('started scanning directory: %s' % self.scanning[0].path)
- walker = self.scanning[0].walk()
- else:
- log.msg('cache directory scan complete')
- return
-
- try:
- # Get the next file in the directory
- file = walker.next()
- except StopIteration:
- # No files left, go to the next directory
- log.msg('done scanning directory: %s' % self.scanning[0].path)
- self.scanning.pop(0)
- reactor.callLater(0, self._scanDirectories)
- return
-
- # If it's not a file ignore it
- if not file.isfile():
- log.msg('entering directory: %s' % file.path)
- reactor.callLater(0, self._scanDirectories, None, walker)
- return
-
- # If it's already properly in the DB, ignore it
- db_status = self.db.isUnchanged(file)
- if db_status:
- log.msg('file is unchanged: %s' % file.path)
- reactor.callLater(0, self._scanDirectories, None, walker)
- return
-
- # Don't hash files in the cache that are not in the DB
- if self.scanning[0] == self.cache_dir:
- if db_status is None:
- log.msg('ignoring unknown cache file: %s' % file.path)
- else:
- log.msg('removing changed cache file: %s' % file.path)
- file.remove()
- reactor.callLater(0, self._scanDirectories, None, walker)
- return
-
- # Otherwise hash it
- log.msg('start hash checking file: %s' % file.path)
- hash = HashObject()
- df = hash.hashInThread(file)
- df.addBoth(self._doneHashing, file, walker)
- df.addErrback(log.err)
-
- def _doneHashing(self, result, file, walker):
- """If successful, add the hashed file to the DB and inform the main program."""
- if isinstance(result, HashObject):
- log.msg('hash check of %s completed with hash: %s' % (file.path, result.hexdigest()))
-
- # Only set a URL if this is a downloaded file
- url = None
- if self.scanning[0] == self.cache_dir:
- url = 'http:/' + file.path[len(self.cache_dir.path):]
-
- # Store the hashed file in the database
- new_hash = self.db.storeFile(file, result.digest())
-
- # Tell the main program to handle the new cache file
- df = self.manager.new_cached_file(file, result, new_hash, url, True)
- if df is None:
- reactor.callLater(0, self._scanDirectories, None, walker)
- else:
- df.addBoth(self._scanDirectories, walker)
- else:
- # Must have returned an error
- log.msg('hash check of %s failed' % file.path)
- log.err(result)
- reactor.callLater(0, self._scanDirectories, None, walker)
-
- #{ Downloading files
- def save_file(self, response, hash, url):
- """Save a downloaded file to the cache and stream it.
-
- @type response: L{twisted.web2.http.Response}
- @param response: the response from the download
- @type hash: L{Hash.HashObject}
- @param hash: the hash object containing the expected hash for the file
- @param url: the URI of the actual mirror request
- @rtype: L{twisted.web2.http.Response}
- @return: the final response from the download
- """
- if response.code != 200:
- log.msg('File was not found (%r): %s' % (response, url))
- return response
-
- log.msg('Returning file: %s' % url)
-
- # Set the destination path for the file
- parsed = urlparse(url)
- destFile = self.cache_dir.preauthChild(parsed[1] + parsed[2])
- log.msg('Saving returned %r byte file to cache: %s' % (response.stream.length, destFile.path))
-
- # Make sure there's a free place for the file
- if destFile.exists():
- log.msg('File already exists, removing: %s' % destFile.path)
- destFile.remove()
- elif not destFile.parent().exists():
- destFile.parent().makedirs()
-
- # Determine whether it needs to be decompressed and how
- root, ext = os.path.splitext(destFile.basename())
- if root.lower() in DECOMPRESS_FILES and ext.lower() in DECOMPRESS_EXTS:
- ext = ext.lower()
- decFile = destFile.sibling(root)
- log.msg('Decompressing to: %s' % decFile.path)
- if decFile.exists():
- log.msg('File already exists, removing: %s' % decFile.path)
- decFile.remove()
- else:
- ext = None
- decFile = None
-
- # Create the new stream from the old one.
- orig_stream = response.stream
- response.stream = ProxyFileStream(orig_stream, destFile, hash, ext, decFile)
- response.stream.doneDefer.addCallback(self._save_complete, url, destFile,
- response.headers.getHeader('Last-Modified'),
- decFile)
- response.stream.doneDefer.addErrback(self.save_error, url)
-
- # Return the modified response with the new stream
- return response
-
- def _save_complete(self, hash, url, destFile, modtime = None, decFile = None):
- """Update the modification time and inform the main program.
-
- @type hash: L{Hash.HashObject}
- @param hash: the hash object containing the expected hash for the file
- @param url: the URI of the actual mirror request
- @type destFile: C{twisted.python.FilePath}
- @param destFile: the file where the download was written to
- @type modtime: C{int}
- @param modtime: the modified time of the cached file (seconds since epoch)
- (optional, defaults to not setting the modification time of the file)
- @type decFile: C{twisted.python.FilePath}
- @param decFile: the file where the decompressed download was written to
- (optional, defaults to the file not having been compressed)
- """
- if modtime:
- os.utime(destFile.path, (modtime, modtime))
- if decFile:
- os.utime(decFile.path, (modtime, modtime))
-
- result = hash.verify()
- if result or result is None:
- if result:
- log.msg('Hashes match: %s' % url)
- else:
- log.msg('Hashed file to %s: %s' % (hash.hexdigest(), url))
-
- new_hash = self.db.storeFile(destFile, hash.digest())
- log.msg('now avaliable: %s' % (url))
-
- if self.manager:
- self.manager.new_cached_file(destFile, hash, new_hash, url)
- if decFile:
- ext_len = len(destFile.path) - len(decFile.path)
- self.manager.new_cached_file(decFile, None, False, url[:-ext_len])
- else:
- log.msg("Hashes don't match %s != %s: %s" % (hash.hexexpected(), hash.hexdigest(), url))
- destFile.remove()
- if decFile:
- decFile.remove()
-
- def save_error(self, failure, url):
- """An error has occurred in downloadign or saving the file."""
- log.msg('Error occurred downloading %s' % url)
- log.err(failure)
- return failure
-
-class TestMirrorManager(unittest.TestCase):
- """Unit tests for the mirror manager."""
-
- timeout = 20
- pending_calls = []
- client = None
-
- def setUp(self):
- self.client = CacheManager(FilePath('/tmp/.apt-p2p'))
-
- def tearDown(self):
- for p in self.pending_calls:
- if p.active():
- p.cancel()
- self.client = None
-
\ No newline at end of file
+++ /dev/null
-
-"""Manage all download requests to a single site."""
-
-from math import exp
-from datetime import datetime, timedelta
-
-from twisted.internet import reactor, defer, protocol
-from twisted.internet.protocol import ClientFactory
-from twisted import version as twisted_version
-from twisted.python import log
-from twisted.web2.client.interfaces import IHTTPClientManager
-from twisted.web2.client.http import ProtocolError, ClientRequest, HTTPClientProtocol
-from twisted.web2 import stream as stream_mod, http_headers
-from twisted.web2 import version as web2_version
-from twisted.trial import unittest
-from zope.interface import implements
-
-from apt_p2p_conf import version
-
-class Peer(ClientFactory):
- """A manager for all HTTP requests to a single peer.
-
- Controls all requests that go to a single peer (host and port).
- This includes buffering requests until they can be sent and reconnecting
- in the event of the connection being closed.
-
- """
-
- implements(IHTTPClientManager)
-
- def __init__(self, host, port=80):
- self.host = host
- self.port = port
- self.busy = False
- self.pipeline = False
- self.closed = True
- self.connecting = False
- self.request_queue = []
- self.response_queue = []
- self.proto = None
- self.connector = None
- self._errors = 0
- self._completed = 0
- self._downloadSpeeds = []
- self._lastResponse = None
- self._responseTimes = []
-
- #{ Manage the request queue
- def connect(self):
- """Connect to the peer."""
- assert self.closed and not self.connecting
- self.connecting = True
- d = protocol.ClientCreator(reactor, HTTPClientProtocol, self).connectTCP(self.host, self.port)
- d.addCallback(self.connected)
-
- def connected(self, proto):
- """Begin processing the queued requests."""
- self.closed = False
- self.connecting = False
- self.proto = proto
- self.processQueue()
-
- def close(self):
- """Close the connection to the peer."""
- if not self.closed:
- self.proto.transport.loseConnection()
-
- def submitRequest(self, request):
- """Add a new request to the queue.
-
- @type request: L{twisted.web2.client.http.ClientRequest}
- @return: deferred that will fire with the completed request
- """
- request.submissionTime = datetime.now()
- request.deferRequest = defer.Deferred()
- self.request_queue.append(request)
- self.processQueue()
- return request.deferRequest
-
- def processQueue(self):
- """Check the queue to see if new requests can be sent to the peer."""
- if not self.request_queue:
- return
- if self.connecting:
- return
- if self.closed:
- self.connect()
- return
- if self.busy and not self.pipeline:
- return
- if self.response_queue and not self.pipeline:
- return
-
- req = self.request_queue.pop(0)
- self.response_queue.append(req)
- req.deferResponse = self.proto.submitRequest(req, False)
- req.deferResponse.addCallbacks(self.requestComplete, self.requestError)
-
- def requestComplete(self, resp):
- """Process a completed request."""
- self._processLastResponse()
- req = self.response_queue.pop(0)
- log.msg('%s of %s completed with code %d' % (req.method, req.uri, resp.code))
- self._completed += 1
- if resp.code >= 400:
- self._errors += 1
- now = datetime.now()
- self._responseTimes.append((now, now - req.submissionTime))
- self._lastResponse = (now, resp.stream.length)
- req.deferRequest.callback(resp)
-
- def requestError(self, error):
- """Process a request that ended with an error."""
- self._processLastResponse()
- req = self.response_queue.pop(0)
- log.msg('Download of %s generated error %r' % (req.uri, error))
- self._completed += 1
- self._errors += 1
- req.deferRequest.errback(error)
-
- def hashError(self, error):
- """Log that a hash error occurred from the peer."""
- log.msg('Hash error from peer (%s, %d): %r' % (self.host, self.port, error))
- self._errors += 1
-
- #{ IHTTPClientManager interface
- def clientBusy(self, proto):
- """Save the busy state."""
- self.busy = True
-
- def clientIdle(self, proto):
- """Try to send a new request."""
- self._processLastResponse()
- self.busy = False
- self.processQueue()
-
- def clientPipelining(self, proto):
- """Try to send a new request."""
- self.pipeline = True
- self.processQueue()
-
- def clientGone(self, proto):
- """Mark sent requests as errors."""
- self._processLastResponse()
- for req in self.response_queue:
- req.deferRequest.errback(ProtocolError('lost connection'))
- self.busy = False
- self.pipeline = False
- self.closed = True
- self.connecting = False
- self.response_queue = []
- self.proto = None
- if self.request_queue:
- self.processQueue()
-
- #{ Downloading request interface
- def setCommonHeaders(self):
- """Get the common HTTP headers for all requests."""
- headers = http_headers.Headers()
- headers.setHeader('Host', self.host)
- headers.setHeader('User-Agent', 'apt-p2p/%s (twisted/%s twisted.web2/%s)' %
- (version.short(), twisted_version.short(), web2_version.short()))
- return headers
-
- def get(self, path, method="GET", modtime=None):
- """Add a new request to the queue.
-
- @type path: C{string}
- @param path: the path to request from the peer
- @type method: C{string}
- @param method: the HTTP method to use, 'GET' or 'HEAD'
- (optional, defaults to 'GET')
- @type modtime: C{int}
- @param modtime: the modification time to use for an 'If-Modified-Since'
- header, as seconds since the epoch
- (optional, defaults to not sending that header)
- """
- headers = self.setCommonHeaders()
- if modtime:
- headers.setHeader('If-Modified-Since', modtime)
- return self.submitRequest(ClientRequest(method, path, headers, None))
-
- def getRange(self, path, rangeStart, rangeEnd, method="GET"):
- """Add a new request with a Range header to the queue.
-
- @type path: C{string}
- @param path: the path to request from the peer
- @type rangeStart: C{int}
- @param rangeStart: the byte to begin the request at
- @type rangeEnd: C{int}
- @param rangeEnd: the byte to end the request at (inclusive)
- @type method: C{string}
- @param method: the HTTP method to use, 'GET' or 'HEAD'
- (optional, defaults to 'GET')
- """
- headers = self.setCommonHeaders()
- headers.setHeader('Range', ('bytes', [(rangeStart, rangeEnd)]))
- return self.submitRequest(ClientRequest(method, path, headers, None))
-
- #{ Peer information
- def isIdle(self):
- """Check whether the peer is idle or not."""
- return not self.busy and not self.request_queue and not self.response_queue
-
- def _processLastResponse(self):
- """Save the download time of the last request for speed calculations."""
- if self._lastResponse is not None:
- now = datetime.now()
- self._downloadSpeeds.append((now, now - self._lastResponse[0], self._lastResponse[1]))
- self._lastResponse = None
-
- def downloadSpeed(self):
- """Gets the latest average download speed for the peer.
-
- The average is over the last 10 responses that occurred in the last hour.
- """
- total_time = 0.0
- total_download = 0
- now = datetime.now()
- while self._downloadSpeeds and (len(self._downloadSpeeds) > 10 or
- now - self._downloadSpeeds[0][0] > timedelta(seconds=3600)):
- self._downloadSpeeds.pop(0)
-
- # If there are none, then you get 0
- if not self._downloadSpeeds:
- return 0.0
-
- for download in self._downloadSpeeds:
- total_time += download[1].days*86400.0 + download[1].seconds + download[1].microseconds/1000000.0
- total_download += download[2]
-
- return total_download / total_time
-
- def responseTime(self):
- """Gets the latest average response time for the peer.
-
- Response time is the time from receiving the request, to the time
- the download begins. The average is over the last 10 responses that
- occurred in the last hour.
- """
- total_response = 0.0
- now = datetime.now()
- while self._responseTimes and (len(self._responseTimes) > 10 or
- now - self._responseTimes[0][0] > timedelta(seconds=3600)):
- self._responseTimes.pop(0)
-
- # If there are none, give it the benefit of the doubt
- if not self._responseTimes:
- return 0.0
-
- for response in self._responseTimes:
- total_response += response[1].days*86400.0 + response[1].seconds + response[1].microseconds/1000000.0
-
- return total_response / len(self._responseTimes)
-
- def rank(self, fastest):
- """Determine the ranking value for the peer.
-
- The ranking value is composed of 5 numbers:
- - 1 if a connection to the peer is open, 0.9 otherwise
- - 1 if there are no pending requests, to 0 if there are a maximum
- - 1 if the peer is the fastest of all peers, to 0 if the speed is 0
- - 1 if all requests are good, 0 if all produced errors
- - an exponentially decreasing number based on the response time
- """
- rank = 1.0
- if self.closed:
- rank *= 0.9
- rank *= (max(0.0, 10.0 - len(self.request_queue) - len(self.response_queue))) / 10.0
- if fastest > 0.0:
- rank *= min(1.0, self.downloadSpeed() / fastest)
- if self._completed:
- rank *= max(0.0, 1.0 - float(self._errors) / self._completed)
- rank *= exp(-self.responseTime() / 5.0)
- return rank
-
-class TestClientManager(unittest.TestCase):
- """Unit tests for the Peer."""
-
- client = None
- pending_calls = []
-
- def gotResp(self, resp, num, expect):
- self.failUnless(resp.code >= 200 and resp.code < 300, "Got a non-200 response: %r" % resp.code)
- if expect is not None:
- self.failUnless(resp.stream.length == expect, "Length was incorrect, got %r, expected %r" % (resp.stream.length, expect))
- def print_(n):
- pass
- def printdone(n):
- pass
- stream_mod.readStream(resp.stream, print_).addCallback(printdone)
-
- def test_download(self):
- """Tests a normal download."""
- host = 'www.ietf.org'
- self.client = Peer(host, 80)
- self.timeout = 10
-
- d = self.client.get('/rfc/rfc0013.txt')
- d.addCallback(self.gotResp, 1, 1070)
- return d
-
- def test_head(self):
- """Tests a 'HEAD' request."""
- host = 'www.ietf.org'
- self.client = Peer(host, 80)
- self.timeout = 10
-
- d = self.client.get('/rfc/rfc0013.txt', "HEAD")
- d.addCallback(self.gotResp, 1, 0)
- return d
-
- def test_multiple_downloads(self):
- """Tests multiple downloads with queueing and connection closing."""
- host = 'www.ietf.org'
- self.client = Peer(host, 80)
- self.timeout = 120
- lastDefer = defer.Deferred()
-
- def newRequest(path, num, expect, last=False):
- d = self.client.get(path)
- d.addCallback(self.gotResp, num, expect)
- if last:
- d.addBoth(lastDefer.callback)
-
- # 3 quick requests
- newRequest("/rfc/rfc0006.txt", 1, 1776)
- newRequest("/rfc/rfc2362.txt", 2, 159833)
- newRequest("/rfc/rfc0801.txt", 3, 40824)
-
- # This one will probably be queued
- self.pending_calls.append(reactor.callLater(1, newRequest, '/rfc/rfc0013.txt', 4, 1070))
-
- # Connection should still be open, but idle
- self.pending_calls.append(reactor.callLater(10, newRequest, '/rfc/rfc0022.txt', 5, 4606))
-
- #Connection should be closed
- self.pending_calls.append(reactor.callLater(30, newRequest, '/rfc/rfc0048.txt', 6, 41696))
- self.pending_calls.append(reactor.callLater(31, newRequest, '/rfc/rfc3261.txt', 7, 647976))
- self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0014.txt', 8, 27))
- self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0001.txt', 9, 21088))
-
- # Now it should definitely be closed
- self.pending_calls.append(reactor.callLater(62, newRequest, '/rfc/rfc2801.txt', 0, 598794, True))
- return lastDefer
-
- def test_multiple_quick_downloads(self):
- """Tests lots of multiple downloads with queueing."""
- host = 'www.ietf.org'
- self.client = Peer(host, 80)
- self.timeout = 30
- lastDefer = defer.Deferred()
-
- def newRequest(path, num, expect, last=False):
- d = self.client.get(path)
- d.addCallback(self.gotResp, num, expect)
- if last:
- d.addBoth(lastDefer.callback)
-
- newRequest("/rfc/rfc0006.txt", 1, 1776)
- newRequest("/rfc/rfc2362.txt", 2, 159833)
- newRequest("/rfc/rfc0801.txt", 3, 40824)
- self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0013.txt', 4, 1070))
- self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0022.txt', 5, 4606))
- self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0048.txt', 6, 41696))
- self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc3261.txt', 7, 647976))
- self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0014.txt', 8, 27))
- self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0001.txt', 9, 21088))
- self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc2801.txt', 0, 598794, True))
- return lastDefer
-
- def checkInfo(self):
- log.msg('Rank is: %r' % self.client.rank(250.0*1024))
- log.msg('Download speed is: %r' % self.client.downloadSpeed())
- log.msg('Response Time is: %r' % self.client.responseTime())
-
- def test_peer_info(self):
- """Test retrieving the peer info during a download."""
- host = 'www.ietf.org'
- self.client = Peer(host, 80)
- self.timeout = 120
- lastDefer = defer.Deferred()
-
- def newRequest(path, num, expect, last=False):
- d = self.client.get(path)
- d.addCallback(self.gotResp, num, expect)
- if last:
- d.addBoth(lastDefer.callback)
-
- newRequest("/rfc/rfc0006.txt", 1, 1776)
- newRequest("/rfc/rfc2362.txt", 2, 159833)
- newRequest("/rfc/rfc0801.txt", 3, 40824)
- self.pending_calls.append(reactor.callLater(1, newRequest, '/rfc/rfc0013.txt', 4, 1070))
- self.pending_calls.append(reactor.callLater(10, newRequest, '/rfc/rfc0022.txt', 5, 4606))
- self.pending_calls.append(reactor.callLater(30, newRequest, '/rfc/rfc0048.txt', 6, 41696))
- self.pending_calls.append(reactor.callLater(31, newRequest, '/rfc/rfc3261.txt', 7, 647976))
- self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0014.txt', 8, 27))
- self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0001.txt', 9, 21088))
- self.pending_calls.append(reactor.callLater(62, newRequest, '/rfc/rfc2801.txt', 0, 598794, True))
-
- for i in xrange(2, 122, 2):
- self.pending_calls.append(reactor.callLater(i, self.checkInfo))
-
- return lastDefer
-
- def test_range(self):
- """Test a Range request."""
- host = 'www.ietf.org'
- self.client = Peer(host, 80)
- self.timeout = 10
-
- d = self.client.getRange('/rfc/rfc0013.txt', 100, 199)
- d.addCallback(self.gotResp, 1, 100)
- return d
-
- def tearDown(self):
- for p in self.pending_calls:
- if p.active():
- p.cancel()
- self.pending_calls = []
- if self.client:
- self.client.close()
- self.client = None
+++ /dev/null
-
-"""Serve local requests from apt and remote requests from peers."""
-
-from urllib import unquote_plus
-from binascii import b2a_hex
-
-from twisted.python import log
-from twisted.internet import defer
-from twisted.web2 import server, http, resource, channel, stream
-from twisted.web2 import static, http_headers, responsecode
-
-from policies import ThrottlingFactory
-from apt_p2p_Khashmir.bencode import bencode
-
-class FileDownloader(static.File):
- """Modified to make it suitable for apt requests.
-
- Tries to find requests in the cache. Found files are first checked for
- freshness before being sent. Requests for unfound and stale files are
- forwarded to the main program for downloading.
-
- @type manager: L{apt_p2p.AptP2P}
- @ivar manager: the main program to query
- """
-
- def __init__(self, path, manager, defaultType="text/plain", ignoredExts=(), processors=None, indexNames=None):
- self.manager = manager
- super(FileDownloader, self).__init__(path, defaultType, ignoredExts, processors, indexNames)
-
- def renderHTTP(self, req):
- log.msg('Got request for %s from %s' % (req.uri, req.remoteAddr))
- resp = super(FileDownloader, self).renderHTTP(req)
- if isinstance(resp, defer.Deferred):
- resp.addCallback(self._renderHTTP_done, req)
- else:
- resp = self._renderHTTP_done(resp, req)
- return resp
-
- def _renderHTTP_done(self, resp, req):
- log.msg('Initial response to %s: %r' % (req.uri, resp))
-
- if self.manager:
- path = 'http:/' + req.uri
- if resp.code >= 200 and resp.code < 400:
- return self.manager.check_freshness(req, path, resp.headers.getHeader('Last-Modified'), resp)
-
- log.msg('Not found, trying other methods for %s' % req.uri)
- return self.manager.get_resp(req, path)
-
- return resp
-
- def createSimilarFile(self, path):
- return self.__class__(path, self.manager, self.defaultType, self.ignoredExts,
- self.processors, self.indexNames[:])
-
-class FileUploaderStream(stream.FileStream):
- """Modified to make it suitable for streaming to peers.
-
- Streams the file is small chunks to make it easier to throttle the
- streaming to peers.
-
- @ivar CHUNK_SIZE: the size of chunks of data to send at a time
- """
-
- CHUNK_SIZE = 4*1024
-
- def read(self, sendfile=False):
- if self.f is None:
- return None
-
- length = self.length
- if length == 0:
- self.f = None
- return None
-
- # Remove the SendFileBuffer and mmap use, just use string reads and writes
-
- readSize = min(length, self.CHUNK_SIZE)
-
- self.f.seek(self.start)
- b = self.f.read(readSize)
- bytesRead = len(b)
- if not bytesRead:
- raise RuntimeError("Ran out of data reading file %r, expected %d more bytes" % (self.f, length))
- else:
- self.length -= bytesRead
- self.start += bytesRead
- return b
-
-
-class FileUploader(static.File):
- """Modified to make it suitable for peer requests.
-
- Uses the modified L{FileUploaderStream} to stream the file for throttling,
- and doesn't do any listing of directory contents.
- """
-
- def render(self, req):
- if not self.fp.exists():
- return responsecode.NOT_FOUND
-
- if self.fp.isdir():
- # Don't try to render a directory listing
- return responsecode.NOT_FOUND
-
- try:
- f = self.fp.open()
- except IOError, e:
- import errno
- if e[0] == errno.EACCES:
- return responsecode.FORBIDDEN
- elif e[0] == errno.ENOENT:
- return responsecode.NOT_FOUND
- else:
- raise
-
- response = http.Response()
- # Use the modified FileStream
- response.stream = FileUploaderStream(f, 0, self.fp.getsize())
-
- for (header, value) in (
- ("content-type", self.contentType()),
- ("content-encoding", self.contentEncoding()),
- ):
- if value is not None:
- response.headers.setHeader(header, value)
-
- return response
-
-class TopLevel(resource.Resource):
- """The HTTP server for all requests, both from peers and apt.
-
- @type directory: L{twisted.python.filepath.FilePath}
- @ivar directory: the directory to check for cached files
- @type db: L{db.DB}
- @ivar db: the database to use for looking up files and hashes
- @type manager: L{apt_p2p.AptP2P}
- @ivar manager: the main program object to send requests to
- @type factory: L{twisted.web2.channel.HTTPFactory} or L{policies.ThrottlingFactory}
- @ivar factory: the factory to use to server HTTP requests
-
- """
-
- addSlash = True
-
- def __init__(self, directory, db, manager):
- """Initialize the instance.
-
- @type directory: L{twisted.python.filepath.FilePath}
- @param directory: the directory to check for cached files
- @type db: L{db.DB}
- @param db: the database to use for looking up files and hashes
- @type manager: L{apt_p2p.AptP2P}
- @param manager: the main program object to send requests to
- """
- self.directory = directory
- self.db = db
- self.manager = manager
- self.factory = None
-
- def getHTTPFactory(self):
- """Initialize and get the factory for this HTTP server."""
- if self.factory is None:
- self.factory = channel.HTTPFactory(server.Site(self),
- **{'maxPipeline': 10,
- 'betweenRequestsTimeOut': 60})
- self.factory = ThrottlingFactory(self.factory, writeLimit = 30*1024)
- return self.factory
-
- def render(self, ctx):
- """Render a web page with descriptive statistics."""
- return http.Response(
- 200,
- {'content-type': http_headers.MimeType('text', 'html')},
- """<html><body>
- <h2>Statistics</h2>
- <p>TODO: eventually some stats will be shown here.</body></html>""")
-
- def locateChild(self, request, segments):
- """Process the incoming request."""
- log.msg('Got HTTP request for %s from %s' % (request.uri, request.remoteAddr))
- name = segments[0]
-
- # If the request is for a shared file (from a peer)
- if name == '~':
- if len(segments) != 2:
- log.msg('Got a malformed request from %s' % request.remoteAddr)
- return None, ()
-
- # Find the file in the database
- hash = unquote_plus(segments[1])
- files = self.db.lookupHash(hash)
- if files:
- # If it is a file, return it
- if 'path' in files[0]:
- log.msg('Sharing %s with %s' % (files[0]['path'].path, request.remoteAddr))
- return FileUploader(files[0]['path'].path), ()
- else:
- # It's not for a file, but for a piece string, so return that
- log.msg('Sending torrent string %s to %s' % (b2a_hex(hash), request.remoteAddr))
- return static.Data(bencode({'t': files[0]['pieces']}), 'application/x-bencoded'), ()
- else:
- log.msg('Hash could not be found in database: %s' % hash)
-
- # Only local requests (apt) get past this point
- if request.remoteAddr.host != "127.0.0.1":
- log.msg('Blocked illegal access to %s from %s' % (request.uri, request.remoteAddr))
- return None, ()
-
- if len(name) > 1:
- # It's a request from apt
- return FileDownloader(self.directory.path, self.manager), segments[0:]
- else:
- # Will render the statistics page
- return self, ()
-
- log.msg('Got a malformed request for "%s" from %s' % (request.uri, request.remoteAddr))
- return None, ()
-
-if __name__ == '__builtin__':
- # Running from twistd -ny HTTPServer.py
- # Then test with:
- # wget -S 'http://localhost:18080/~/whatever'
- # wget -S 'http://localhost:18080/~/pieces'
-
- import os.path
- from twisted.python.filepath import FilePath
-
- class DB:
- def lookupHash(self, hash):
- if hash == 'pieces':
- return [{'pieces': 'abcdefghij0123456789\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!'}]
- return [{'path': FilePath(os.path.expanduser('~/school/optout'))}]
-
- t = TopLevel(FilePath(os.path.expanduser('~')), DB(), None)
- factory = t.getHTTPFactory()
-
- # Standard twisted application Boilerplate
- from twisted.application import service, strports
- application = service.Application("demoserver")
- s = strports.service('tcp:18080', factory)
- s.setServiceParent(application)
+++ /dev/null
-
-"""Hash and store hash information for a file.
-
-@var PIECE_SIZE: the piece size to use for hashing pieces of files
-
-"""
-
-from binascii import b2a_hex, a2b_hex
-import sys
-
-from twisted.internet import threads, defer
-from twisted.trial import unittest
-
-PIECE_SIZE = 512*1024
-
-class HashError(ValueError):
- """An error has occurred while hashing a file."""
-
-class HashObject:
- """Manages hashes and hashing for a file.
-
- @ivar ORDER: the priority ordering of hashes, and how to extract them
-
- """
-
- ORDER = [ {'name': 'sha1',
- 'length': 20,
- 'AptPkgRecord': 'SHA1Hash',
- 'AptSrcRecord': False,
- 'AptIndexRecord': 'SHA1',
- 'old_module': 'sha',
- 'hashlib_func': 'sha1',
- },
- {'name': 'sha256',
- 'length': 32,
- 'AptPkgRecord': 'SHA256Hash',
- 'AptSrcRecord': False,
- 'AptIndexRecord': 'SHA256',
- 'hashlib_func': 'sha256',
- },
- {'name': 'md5',
- 'length': 16,
- 'AptPkgRecord': 'MD5Hash',
- 'AptSrcRecord': True,
- 'AptIndexRecord': 'MD5SUM',
- 'old_module': 'md5',
- 'hashlib_func': 'md5',
- },
- ]
-
- def __init__(self, digest = None, size = None, pieces = ''):
- """Initialize the hash object."""
- self.hashTypeNum = 0 # Use the first if nothing else matters
- if sys.version_info < (2, 5):
- # sha256 is not available in python before 2.5, remove it
- for hashType in self.ORDER:
- if hashType['name'] == 'sha256':
- del self.ORDER[self.ORDER.index(hashType)]
- break
-
- self.expHash = None
- self.expHex = None
- self.expSize = None
- self.expNormHash = None
- self.fileHasher = None
- self.pieceHasher = None
- self.fileHash = digest
- self.pieceHash = [pieces[x:x+self.ORDER[self.hashTypeNum]['length']]
- for x in xrange(0, len(pieces), self.ORDER[self.hashTypeNum]['length'])]
- self.size = size
- self.fileHex = None
- self.fileNormHash = None
- self.done = True
- self.result = None
-
- #{ Hashing data
- def new(self, force = False):
- """Generate a new hashing object suitable for hashing a file.
-
- @param force: set to True to force creating a new object even if
- the hash has been verified already
- """
- if self.result is None or force:
- self.result = None
- self.done = False
- self.fileHasher = self._new()
- self.pieceHasher = None
- self.fileHash = None
- self.pieceHash = []
- self.size = 0
- self.fileHex = None
- self.fileNormHash = None
-
- def _new(self):
- """Create a new hashing object according to the hash type."""
- if sys.version_info < (2, 5):
- mod = __import__(self.ORDER[self.hashTypeNum]['old_module'], globals(), locals(), [])
- return mod.new()
- else:
- import hashlib
- func = getattr(hashlib, self.ORDER[self.hashTypeNum]['hashlib_func'])
- return func()
-
- def update(self, data):
- """Add more data to the file hasher."""
- if self.result is None:
- if self.done:
- raise HashError, "Already done, you can't add more data after calling digest() or verify()"
- if self.fileHasher is None:
- raise HashError, "file hasher not initialized"
-
- if not self.pieceHasher and self.size + len(data) > PIECE_SIZE:
- # Hash up to the piece size
- self.fileHasher.update(data[:(PIECE_SIZE - self.size)])
- data = data[(PIECE_SIZE - self.size):]
- self.size = PIECE_SIZE
-
- # Save the first piece digest and initialize a new piece hasher
- self.pieceHash.append(self.fileHasher.digest())
- self.pieceHasher = self._new()
-
- if self.pieceHasher:
- # Loop in case the data contains multiple pieces
- piece_size = self.size % PIECE_SIZE
- while piece_size + len(data) > PIECE_SIZE:
- # Save the piece hash and start a new one
- self.pieceHasher.update(data[:(PIECE_SIZE - piece_size)])
- self.pieceHash.append(self.pieceHasher.digest())
- self.pieceHasher = self._new()
-
- # Don't forget to hash the data normally
- self.fileHasher.update(data[:(PIECE_SIZE - piece_size)])
- data = data[(PIECE_SIZE - piece_size):]
- self.size += PIECE_SIZE - piece_size
- piece_size = self.size % PIECE_SIZE
-
- # Hash any remaining data
- self.pieceHasher.update(data)
-
- self.fileHasher.update(data)
- self.size += len(data)
-
- def hashInThread(self, file):
- """Hashes a file in a separate thread, returning a deferred that will callback with the result."""
- file.restat(False)
- if not file.exists():
- df = defer.Deferred()
- df.errback(HashError("file not found"))
- return df
-
- df = threads.deferToThread(self._hashInThread, file)
- return df
-
- def _hashInThread(self, file):
- """Hashes a file, returning itself as the result."""
- f = file.open()
- self.new(force = True)
- data = f.read(4096)
- while data:
- self.update(data)
- data = f.read(4096)
- self.digest()
- return self
-
- #{ Checking hashes of data
- def pieceDigests(self):
- """Get the piece hashes of the added file data."""
- self.digest()
- return self.pieceHash
-
- def digest(self):
- """Get the hash of the added file data."""
- if self.fileHash is None:
- if self.fileHasher is None:
- raise HashError, "you must hash some data first"
- self.fileHash = self.fileHasher.digest()
- self.done = True
-
- # Save the last piece hash
- if self.pieceHasher:
- self.pieceHash.append(self.pieceHasher.digest())
- return self.fileHash
-
- def hexdigest(self):
- """Get the hash of the added file data in hex format."""
- if self.fileHex is None:
- self.fileHex = b2a_hex(self.digest())
- return self.fileHex
-
- def verify(self):
- """Verify that the added file data hash matches the expected hash."""
- if self.result is None and self.fileHash is not None and self.expHash is not None:
- self.result = (self.fileHash == self.expHash and self.size == self.expSize)
- return self.result
-
- #{ Expected hash
- def expected(self):
- """Get the expected hash."""
- return self.expHash
-
- def hexexpected(self):
- """Get the expected hash in hex format."""
- if self.expHex is None and self.expHash is not None:
- self.expHex = b2a_hex(self.expHash)
- return self.expHex
-
- #{ Setting the expected hash
- def set(self, hashType, hashHex, size):
- """Initialize the hash object.
-
- @param hashType: must be one of the dictionaries from L{ORDER}
- """
- self.hashTypeNum = self.ORDER.index(hashType) # error if not found
- self.expHex = hashHex
- self.expSize = int(size)
- self.expHash = a2b_hex(self.expHex)
-
- def setFromIndexRecord(self, record):
- """Set the hash from the cache of index file records.
-
- @type record: C{dictionary}
- @param record: keys are hash types, values are tuples of (hash, size)
- """
- for hashType in self.ORDER:
- result = record.get(hashType['AptIndexRecord'], None)
- if result:
- self.set(hashType, result[0], result[1])
- return True
- return False
-
- def setFromPkgRecord(self, record, size):
- """Set the hash from Apt's binary packages cache.
-
- @param record: whatever is returned by apt_pkg.GetPkgRecords()
- """
- for hashType in self.ORDER:
- hashHex = getattr(record, hashType['AptPkgRecord'], None)
- if hashHex:
- self.set(hashType, hashHex, size)
- return True
- return False
-
- def setFromSrcRecord(self, record):
- """Set the hash from Apt's source package records cache.
-
- Currently very simple since Apt only tracks MD5 hashes of source files.
-
- @type record: (C{string}, C{int}, C{string})
- @param record: the hash, size and path of the source file
- """
- for hashType in self.ORDER:
- if hashType['AptSrcRecord']:
- self.set(hashType, record[0], record[1])
- return True
- return False
-
-class TestHashObject(unittest.TestCase):
- """Unit tests for the hash objects."""
-
- timeout = 5
- if sys.version_info < (2, 4):
- skip = "skippingme"
-
- def test_failure(self):
- """Tests that the hash object fails when treated badly."""
- h = HashObject()
- h.set(h.ORDER[0], b2a_hex('12345678901234567890'), '0')
- self.failUnlessRaises(HashError, h.digest)
- self.failUnlessRaises(HashError, h.hexdigest)
- self.failUnlessRaises(HashError, h.update, 'gfgf')
-
- def test_pieces(self):
- """Tests the hashing of large files into pieces."""
- h = HashObject()
- h.new()
- h.update('1234567890'*120*1024)
- self.failUnless(h.digest() == '1(j\xd2q\x0b\n\x91\xd2\x13\x90\x15\xa3E\xcc\xb0\x8d.\xc3\xc5')
- pieces = h.pieceDigests()
- self.failUnless(len(pieces) == 3)
- self.failUnless(pieces[0] == ',G \xd8\xbbPl\xf1\xa3\xa0\x0cW\n\xe6\xe6a\xc9\x95/\xe5')
- self.failUnless(pieces[1] == '\xf6V\xeb/\xa8\xad[\x07Z\xf9\x87\xa4\xf5w\xdf\xe1|\x00\x8e\x93')
- self.failUnless(pieces[2] == 'M[\xbf\xee\xaa+\x19\xbaV\xf699\r\x17o\xcb\x8e\xcfP\x19')
- h.new(True)
- for i in xrange(120*1024):
- h.update('1234567890')
- pieces = h.pieceDigests()
- self.failUnless(h.digest() == '1(j\xd2q\x0b\n\x91\xd2\x13\x90\x15\xa3E\xcc\xb0\x8d.\xc3\xc5')
- self.failUnless(len(pieces) == 3)
- self.failUnless(pieces[0] == ',G \xd8\xbbPl\xf1\xa3\xa0\x0cW\n\xe6\xe6a\xc9\x95/\xe5')
- self.failUnless(pieces[1] == '\xf6V\xeb/\xa8\xad[\x07Z\xf9\x87\xa4\xf5w\xdf\xe1|\x00\x8e\x93')
- self.failUnless(pieces[2] == 'M[\xbf\xee\xaa+\x19\xbaV\xf699\r\x17o\xcb\x8e\xcfP\x19')
-
- def test_sha1(self):
- """Test hashing using the SHA1 hash."""
- h = HashObject()
- found = False
- for hashType in h.ORDER:
- if hashType['name'] == 'sha1':
- found = True
- break
- self.failUnless(found == True)
- h.set(hashType, '3bba0a5d97b7946ad2632002bf9caefe2cb18e00', '19')
- h.new()
- h.update('apt-p2p is the best')
- self.failUnless(h.hexdigest() == '3bba0a5d97b7946ad2632002bf9caefe2cb18e00')
- self.failUnlessRaises(HashError, h.update, 'gfgf')
- self.failUnless(h.verify() == True)
-
- def test_md5(self):
- """Test hashing using the MD5 hash."""
- h = HashObject()
- found = False
- for hashType in h.ORDER:
- if hashType['name'] == 'md5':
- found = True
- break
- self.failUnless(found == True)
- h.set(hashType, '6b5abdd30d7ed80edd229f9071d8c23c', '19')
- h.new()
- h.update('apt-p2p is the best')
- self.failUnless(h.hexdigest() == '6b5abdd30d7ed80edd229f9071d8c23c')
- self.failUnlessRaises(HashError, h.update, 'gfgf')
- self.failUnless(h.verify() == True)
-
- def test_sha256(self):
- """Test hashing using the SHA256 hash."""
- h = HashObject()
- found = False
- for hashType in h.ORDER:
- if hashType['name'] == 'sha256':
- found = True
- break
- self.failUnless(found == True)
- h.set(hashType, '47f2238a30a0340faa2bf01a9bdc42ba77b07b411cda1e24cd8d7b5c4b7d82a7', '19')
- h.new()
- h.update('apt-p2p is the best')
- self.failUnless(h.hexdigest() == '47f2238a30a0340faa2bf01a9bdc42ba77b07b411cda1e24cd8d7b5c4b7d82a7')
- self.failUnlessRaises(HashError, h.update, 'gfgf')
- self.failUnless(h.verify() == True)
-
- if sys.version_info < (2, 5):
- test_sha256.skip = "SHA256 hashes are not supported by Python until version 2.5"
+++ /dev/null
-
-"""Manage the multiple mirrors that may be requested.
-
-@var aptpkg_dir: the name of the directory to use for mirror files
-"""
-
-from urlparse import urlparse
-import os
-
-from twisted.python import log
-from twisted.python.filepath import FilePath
-from twisted.internet import defer
-from twisted.trial import unittest
-from twisted.web2.http import splitHostPort
-
-from AptPackages import AptPackages
-
-aptpkg_dir='apt-packages'
-
-class MirrorError(Exception):
- """Exception raised when there's a problem with the mirror."""
-
-class MirrorManager:
- """Manages all requests for mirror information.
-
- @type cache_dir: L{twisted.python.filepath.FilePath}
- @ivar cache_dir: the directory to use for storing all files
- @type unload_delay: C{int}
- @ivar unload_delay: the time to wait before unloading the apt cache
- @type apt_caches: C{dictionary}
- @ivar apt_caches: the avaliable mirrors
- """
-
- def __init__(self, cache_dir, unload_delay):
- self.cache_dir = cache_dir
- self.unload_delay = unload_delay
- self.apt_caches = {}
-
- def extractPath(self, url):
- """Break the full URI down into the site, base directory and path.
-
- Site is the host and port of the mirror. Base directory is the
- directory to the mirror location (usually just '/debian'). Path is
- the remaining path to get to the file.
-
- E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2
- would return ('ftp.debian.org:80', '/debian',
- '/dists/sid/binary-i386/Packages.bz2').
-
- @param url: the URI of the file's location on the mirror
- @rtype: (C{string}, C{string}, C{string})
- @return: the site, base directory and path to the file
- """
- # Extract the host and port
- parsed = urlparse(url)
- host, port = splitHostPort(parsed[0], parsed[1])
- site = host + ":" + str(port)
- path = parsed[2]
-
- # Try to find the base directory (most can be found this way)
- i = max(path.rfind('/dists/'), path.rfind('/pool/'))
- if i >= 0:
- baseDir = path[:i]
- path = path[i:]
- else:
- # Uh oh, this is not good
- log.msg("Couldn't find a good base directory for path: %s" % (site + path))
-
- # Try to find an existing cache that starts with this one
- # (fallback to using an empty base directory)
- baseDir = ''
- if site in self.apt_caches:
- longest_match = 0
- for base in self.apt_caches[site]:
- base_match = ''
- for dirs in path.split('/'):
- if base.startswith(base_match + '/' + dirs):
- base_match += '/' + dirs
- else:
- break
- if len(base_match) > longest_match:
- longest_match = len(base_match)
- baseDir = base_match
- log.msg("Settled on baseDir: %s" % baseDir)
-
- return site, baseDir, path
-
- def init(self, site, baseDir):
- """Make sure an L{AptPackages} exists for this mirror."""
- if site not in self.apt_caches:
- self.apt_caches[site] = {}
-
- if baseDir not in self.apt_caches[site]:
- site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_'))
- site_cache.makedirs
- self.apt_caches[site][baseDir] = AptPackages(site_cache, self.unload_delay)
-
- def updatedFile(self, url, file_path):
- """A file in the mirror has changed or been added.
-
- @see: L{AptPackages.PackageFileList.update_file}
- """
- site, baseDir, path = self.extractPath(url)
- self.init(site, baseDir)
- self.apt_caches[site][baseDir].file_updated(path, file_path)
-
- def findHash(self, url):
- """Find the hash for a given url.
-
- @param url: the URI of the file's location on the mirror
- @rtype: L{twisted.internet.defer.Deferred}
- @return: a deferred that will fire with the returned L{Hash.HashObject}
- """
- site, baseDir, path = self.extractPath(url)
- if site in self.apt_caches and baseDir in self.apt_caches[site]:
- return self.apt_caches[site][baseDir].findHash(path)
- d = defer.Deferred()
- d.errback(MirrorError("Site Not Found"))
- return d
-
- def cleanup(self):
- for site in self.apt_caches.keys():
- for baseDir in self.apt_caches[site].keys():
- self.apt_caches[site][baseDir].cleanup()
- del self.apt_caches[site][baseDir]
- del self.apt_caches[site]
-
-class TestMirrorManager(unittest.TestCase):
- """Unit tests for the mirror manager."""
-
- timeout = 20
- pending_calls = []
- client = None
-
- def setUp(self):
- self.client = MirrorManager(FilePath('/tmp/.apt-p2p'), 300)
-
- def test_extractPath(self):
- """Test extracting the site and base directory from various mirrors."""
- site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
- self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
- self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
- self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
-
- site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
- self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
- self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
- self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
-
- site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
- self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
- self.failUnless(baseDir == "", "no match: %s" % baseDir)
- self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
-
- def verifyHash(self, found_hash, path, true_hash):
- self.failUnless(found_hash.hexexpected() == true_hash,
- "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
-
- def test_findHash(self):
- """Tests finding the hash of an index file, binary package, source package, and another index file."""
- # Find the largest index files that are for 'main'
- self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
- self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
-
- # Find the Release file corresponding to the found Packages file
- for f in os.walk('/var/lib/apt/lists').next()[2]:
- if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
- self.releaseFile = f
- break
-
- # Add all the found files to the mirror
- self.client.updatedFile('http://' + self.releaseFile.replace('_','/'),
- FilePath('/var/lib/apt/lists/' + self.releaseFile))
- self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
- self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'),
- FilePath('/var/lib/apt/lists/' + self.packagesFile))
- self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
- self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'),
- FilePath('/var/lib/apt/lists/' + self.sourcesFile))
-
- lastDefer = defer.Deferred()
-
- # Lookup a Packages.bz2 file
- idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
- '/var/lib/apt/lists/' + self.releaseFile +
- ' | grep -E " main/binary-i386/Packages.bz2$"'
- ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
- idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
-
- d = self.client.findHash(idx_path)
- d.addCallback(self.verifyHash, idx_path, idx_hash)
-
- # Lookup the binary 'dpkg' package
- pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.packagesFile +
- ' | grep -E "^SHA1:" | head -n 1' +
- ' | cut -d\ -f 2').read().rstrip('\n')
- pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
- os.popen('grep -A 30 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.packagesFile +
- ' | grep -E "^Filename:" | head -n 1' +
- ' | cut -d\ -f 2').read().rstrip('\n')
-
- d = self.client.findHash(pkg_path)
- d.addCallback(self.verifyHash, pkg_path, pkg_hash)
-
- # Lookup the source 'dpkg' package
- src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.sourcesFile +
- ' | grep -E "^Directory:" | head -n 1' +
- ' | cut -d\ -f 2').read().rstrip('\n')
- src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.sourcesFile +
- ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
- ' | cut -d\ -f 2').read().split('\n')[:-1]
- src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
- '/var/lib/apt/lists/' + self.sourcesFile +
- ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
- ' | cut -d\ -f 4').read().split('\n')[:-1]
-
- for i in range(len(src_hashes)):
- src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
- d = self.client.findHash(src_path)
- d.addCallback(self.verifyHash, src_path, src_hashes[i])
-
- # Lookup a Sources.bz2 file
- idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
- '/var/lib/apt/lists/' + self.releaseFile +
- ' | grep -E " main/source/Sources.bz2$"'
- ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
- idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
-
- d = self.client.findHash(idx_path)
- d.addCallback(self.verifyHash, idx_path, idx_hash)
-
- d.addBoth(lastDefer.callback)
- return lastDefer
-
- def tearDown(self):
- for p in self.pending_calls:
- if p.active():
- p.cancel()
- self.client.cleanup()
- self.client = None
-
\ No newline at end of file
+++ /dev/null
-
-"""Manage a set of peers and the requests to them."""
-
-from random import choice
-from urlparse import urlparse, urlunparse
-from urllib import quote_plus
-
-from twisted.internet import reactor, defer
-from twisted.python import log
-from twisted.trial import unittest
-from twisted.web2 import stream as stream_mod
-from twisted.web2.http import splitHostPort
-
-from HTTPDownloader import Peer
-from util import uncompact
-
-class PeerManager:
- """Manage a set of peers and the requests to them.
-
- @type clients: C{dictionary}
- @ivar clients: the available peers that have been previously contacted
- """
-
- def __init__(self):
- """Initialize the instance."""
- self.clients = {}
-
- def get(self, hash, mirror, peers = [], method="GET", modtime=None):
- """Download from a list of peers or fallback to a mirror.
-
- @type hash: L{Hash.HashObject}
- @param hash: the hash object containing the expected hash for the file
- @param mirror: the URI of the file on the mirror
- @type peers: C{list} of C{string}
- @param peers: a list of the peer info where the file can be found
- (optional, defaults to downloading from the mirror)
- @type method: C{string}
- @param method: the HTTP method to use, 'GET' or 'HEAD'
- (optional, defaults to 'GET')
- @type modtime: C{int}
- @param modtime: the modification time to use for an 'If-Modified-Since'
- header, as seconds since the epoch
- (optional, defaults to not sending that header)
- """
- if peers:
- # Choose one of the peers at random
- compact_peer = choice(peers)
- peer = uncompact(compact_peer['c'])
- log.msg('Downloading from peer %r' % (peer, ))
- site = peer
- path = '/~/' + quote_plus(hash.expected())
- else:
- log.msg('Downloading (%s) from mirror %s' % (method, mirror))
- parsed = urlparse(mirror)
- assert parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0]
- site = splitHostPort(parsed[0], parsed[1])
- path = urlunparse(('', '') + parsed[2:])
-
- return self.getPeer(site, path, method, modtime)
-
- def getPeer(self, site, path, method="GET", modtime=None):
- """Create a new peer if necessary and forward the request to it.
-
- @type site: (C{string}, C{int})
- @param site: the IP address and port of the peer
- @type path: C{string}
- @param path: the path to the file on the peer
- @type method: C{string}
- @param method: the HTTP method to use, 'GET' or 'HEAD'
- (optional, defaults to 'GET')
- @type modtime: C{int}
- @param modtime: the modification time to use for an 'If-Modified-Since'
- header, as seconds since the epoch
- (optional, defaults to not sending that header)
- """
- if site not in self.clients:
- self.clients[site] = Peer(site[0], site[1])
- return self.clients[site].get(path, method, modtime)
-
- def close(self):
- """Close all the connections to peers."""
- for site in self.clients:
- self.clients[site].close()
- self.clients = {}
-
-class TestPeerManager(unittest.TestCase):
- """Unit tests for the PeerManager."""
-
- manager = None
- pending_calls = []
-
- def gotResp(self, resp, num, expect):
- self.failUnless(resp.code >= 200 and resp.code < 300, "Got a non-200 response: %r" % resp.code)
- if expect is not None:
- self.failUnless(resp.stream.length == expect, "Length was incorrect, got %r, expected %r" % (resp.stream.length, expect))
- def print_(n):
- pass
- def printdone(n):
- pass
- stream_mod.readStream(resp.stream, print_).addCallback(printdone)
-
- def test_download(self):
- """Tests a normal download."""
- self.manager = PeerManager()
- self.timeout = 10
-
- host = 'www.ietf.org'
- d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt')
- d.addCallback(self.gotResp, 1, 1070)
- return d
-
- def test_head(self):
- """Tests a 'HEAD' request."""
- self.manager = PeerManager()
- self.timeout = 10
-
- host = 'www.ietf.org'
- d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt', method = "HEAD")
- d.addCallback(self.gotResp, 1, 0)
- return d
-
- def test_multiple_downloads(self):
- """Tests multiple downloads with queueing and connection closing."""
- self.manager = PeerManager()
- self.timeout = 120
- lastDefer = defer.Deferred()
-
- def newRequest(host, path, num, expect, last=False):
- d = self.manager.get('', 'http://' + host + ':' + str(80) + path)
- d.addCallback(self.gotResp, num, expect)
- if last:
- d.addBoth(lastDefer.callback)
-
- newRequest('www.ietf.org', "/rfc/rfc0006.txt", 1, 1776)
- newRequest('www.ietf.org', "/rfc/rfc2362.txt", 2, 159833)
- newRequest('www.google.ca', "/", 3, None)
- self.pending_calls.append(reactor.callLater(1, newRequest, 'www.sfu.ca', '/', 4, None))
- self.pending_calls.append(reactor.callLater(10, newRequest, 'www.ietf.org', '/rfc/rfc0048.txt', 5, 41696))
- self.pending_calls.append(reactor.callLater(30, newRequest, 'www.ietf.org', '/rfc/rfc0022.txt', 6, 4606))
- self.pending_calls.append(reactor.callLater(31, newRequest, 'www.sfu.ca', '/studentcentral/index.html', 7, None))
- self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0014.txt', 8, 27))
- self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0001.txt', 9, 21088))
- self.pending_calls.append(reactor.callLater(62, newRequest, 'www.google.ca', '/intl/en/options/', 0, None, True))
- return lastDefer
-
- def tearDown(self):
- for p in self.pending_calls:
- if p.active():
- p.cancel()
- self.pending_calls = []
- if self.manager:
- self.manager.close()
- self.manager = None
+++ /dev/null
-
-"""The main apt-p2p modules.
-
-To run apt-p2p, you probably want to do something like::
-
- from apt_p2p.apt_p2p import AptP2P
- myapp = AptP2P(myDHT)
-
-where myDHT is a DHT that implements interfaces.IDHT.
-
-Diagram of the interaction between the given modules::
-
- +---------------+ +-----------------------------------+ +-------------
- | AptP2P | | DHT | | Internet
- | |--->|join DHT|----|--\
- | |--->|loadConfig | | | Another
- | |--->|getValue | | | Node
- | |--->|storeValue DHT|<---|--/
- | |--->|leave | |
- | | +-----------------------------------+ |
- | | +-------------+ +----------------+ |
- | | | PeerManager | | HTTPDownloader*| |
- | |--->|get |--->|get HTTP|----|---> Mirror
- | | | |--->|getRange | |
- | |--->|close |--->|close HTTP|----|--\
- | | +-------------+ +----------------+ | | Another
- | | +-----------------------------------+ | | Peer
- | | | HTTPServer HTTP|<---|--/
- | |--->|getHTTPFactory | +-------------
- |check_freshness|<---| | +-------------
- | get_resp|<---| HTTP|<---|HTTP Request
- | | +-----------------------------------+ |
- | | +---------------+ +--------------+ | Local Net
- | | | CacheManager | | ProxyFile- | | (apt)
- | |--->|scanDirectories| | Stream* | |
- | |--->|save_file |--->|__init__ HTTP|--->|HTTP Response
- | |--->|save_error | | | +-------------
- | | | | | | +-------------
- |new_cached_file|<---| | | file|--->|write file
- | | +---------------+ +--------------+ |
- | | +---------------+ +--------------+ | Filesystem
- | | | MirrorManager | | AptPackages* | |
- | |--->|updatedFile |--->|file_updated | |
- | |--->|findHash |--->|findHash file|<---|read file
- +---------------+ +---------------+ +--------------+ +-------------
-
-"""
+++ /dev/null
-
-"""The main program code.
-
-@var DHT_PIECES: the maximum number of pieces to store with our contact info
- in the DHT
-@var TORRENT_PIECES: the maximum number of pieces to store as a separate entry
- in the DHT
-@var download_dir: the name of the directory to use for downloaded files
-
-"""
-
-from binascii import b2a_hex
-from urlparse import urlunparse
-import os, re, sha
-
-from twisted.internet import defer, reactor
-from twisted.web2 import server, http, http_headers, static
-from twisted.python import log, failure
-from twisted.python.filepath import FilePath
-
-from apt_p2p_conf import config
-from PeerManager import PeerManager
-from HTTPServer import TopLevel
-from MirrorManager import MirrorManager
-from CacheManager import CacheManager
-from Hash import HashObject
-from db import DB
-from util import findMyIPAddr, compact
-
-DHT_PIECES = 4
-TORRENT_PIECES = 70
-
-download_dir = 'cache'
-
-class AptP2P:
- """The main code object that does all of the work.
-
- Contains all of the sub-components that do all the low-level work, and
- coordinates communication between them.
-
- @type cache_dir: L{twisted.python.filepath.FilePath}
- @ivar cache_dir: the directory to use for storing all files
- @type db: L{db.DB}
- @ivar db: the database to use for tracking files and hashes
- @type dht: L{interfaces.IDHT}
- @ivar dht: the DHT instance to use
- @type http_server: L{HTTPServer.TopLevel}
- @ivar http_server: the web server that will handle all requests from apt
- and from other peers
- @type peers: L{PeerManager.PeerManager}
- @ivar peers: the manager of all downloads from mirrors and other peers
- @type mirrors: L{MirrorManager.MirrorManager}
- @ivar mirrors: the manager of downloaded information about mirrors which
- can be queried to get hashes from file names
- @type cache: L{CacheManager.CacheManager}
- @ivar cache: the manager of all downloaded files
- @type my_contact: C{string}
- @ivar my_contact: the 6-byte compact peer representation of this peer's
- download information (IP address and port)
- """
-
- def __init__(self, dht):
- """Initialize all the sub-components.
-
- @type dht: L{interfaces.IDHT}
- @param dht: the DHT instance to use
- """
- log.msg('Initializing the main apt_p2p application')
- self.cache_dir = FilePath(config.get('DEFAULT', 'cache_dir'))
- if not self.cache_dir.child(download_dir).exists():
- self.cache_dir.child(download_dir).makedirs()
- self.db = DB(self.cache_dir.child('apt-p2p.db'))
- self.dht = dht
- self.dht.loadConfig(config, config.get('DEFAULT', 'DHT'))
- self.dht.join().addCallbacks(self.joinComplete, self.joinError)
- self.http_server = TopLevel(self.cache_dir.child(download_dir), self.db, self)
- self.getHTTPFactory = self.http_server.getHTTPFactory
- self.peers = PeerManager()
- self.mirrors = MirrorManager(self.cache_dir, config.gettime('DEFAULT', 'UNLOAD_PACKAGES_CACHE'))
- other_dirs = [FilePath(f) for f in config.getstringlist('DEFAULT', 'OTHER_DIRS')]
- self.cache = CacheManager(self.cache_dir.child(download_dir), self.db, other_dirs, self)
- self.my_contact = None
-
- #{ DHT maintenance
- def joinComplete(self, result):
- """Complete the DHT join process and determine our download information.
-
- Called by the DHT when the join has been completed with information
- on the external IP address and port of this peer.
- """
- my_addr = findMyIPAddr(result,
- config.getint(config.get('DEFAULT', 'DHT'), 'PORT'),
- config.getboolean('DEFAULT', 'LOCAL_OK'))
- if not my_addr:
- raise RuntimeError, "IP address for this machine could not be found"
- self.my_contact = compact(my_addr, config.getint('DEFAULT', 'PORT'))
- self.cache.scanDirectories()
- reactor.callLater(60, self.refreshFiles)
-
- def joinError(self, failure):
- """Joining the DHT has failed."""
- log.msg("joining DHT failed miserably")
- log.err(failure)
- raise RuntimeError, "IP address for this machine could not be found"
-
- def refreshFiles(self):
- """Refresh any files in the DHT that are about to expire."""
- expireAfter = config.gettime('DEFAULT', 'KEY_REFRESH')
- hashes = self.db.expiredHashes(expireAfter)
- if len(hashes.keys()) > 0:
- log.msg('Refreshing the keys of %d DHT values' % len(hashes.keys()))
- self._refreshFiles(None, hashes)
-
- def _refreshFiles(self, result, hashes):
- if result is not None:
- log.msg('Storage resulted in: %r' % result)
-
- if hashes:
- raw_hash = hashes.keys()[0]
- self.db.refreshHash(raw_hash)
- hash = HashObject(raw_hash, pieces = hashes[raw_hash]['pieces'])
- del hashes[raw_hash]
- storeDefer = self.store(hash)
- storeDefer.addBoth(self._refreshFiles, hashes)
- else:
- reactor.callLater(60, self.refreshFiles)
-
- #{ Main workflow
- def check_freshness(self, req, url, modtime, resp):
- """Send a HEAD to the mirror to check if the response from the cache is still valid.
-
- @type req: L{twisted.web2.http.Request}
- @param req: the initial request sent to the HTTP server by apt
- @param url: the URI of the actual mirror request
- @type modtime: C{int}
- @param modtime: the modified time of the cached file (seconds since epoch)
- @type resp: L{twisted.web2.http.Response}
- @param resp: the response from the cache to be sent to apt
- @rtype: L{twisted.internet.defer.Deferred}
- @return: a deferred that will be called back with the correct response
- """
- log.msg('Checking if %s is still fresh' % url)
- d = self.peers.get('', url, method = "HEAD", modtime = modtime)
- d.addCallback(self.check_freshness_done, req, url, resp)
- return d
-
- def check_freshness_done(self, resp, req, url, orig_resp):
- """Process the returned response from the mirror.
-
- @type resp: L{twisted.web2.http.Response}
- @param resp: the response from the mirror to the HEAD request
- @type req: L{twisted.web2.http.Request}
- @param req: the initial request sent to the HTTP server by apt
- @param url: the URI of the actual mirror request
- @type orig_resp: L{twisted.web2.http.Response}
- @param orig_resp: the response from the cache to be sent to apt
- """
- if resp.code == 304:
- log.msg('Still fresh, returning: %s' % url)
- return orig_resp
- else:
- log.msg('Stale, need to redownload: %s' % url)
- return self.get_resp(req, url)
-
- def get_resp(self, req, url):
- """Lookup a hash for the file in the local mirror info.
-
- Starts the process of getting a response to an uncached apt request.
-
- @type req: L{twisted.web2.http.Request}
- @param req: the initial request sent to the HTTP server by apt
- @param url: the URI of the actual mirror request
- @rtype: L{twisted.internet.defer.Deferred}
- @return: a deferred that will be called back with the response
- """
- d = defer.Deferred()
-
- log.msg('Trying to find hash for %s' % url)
- findDefer = self.mirrors.findHash(url)
-
- findDefer.addCallbacks(self.findHash_done, self.findHash_error,
- callbackArgs=(req, url, d), errbackArgs=(req, url, d))
- findDefer.addErrback(log.err)
- return d
-
- def findHash_error(self, failure, req, url, d):
- """Process the error in hash lookup by returning an empty L{HashObject}."""
- log.err(failure)
- self.findHash_done(HashObject(), req, url, d)
-
- def findHash_done(self, hash, req, url, d):
- """Use the returned hash to lookup the file in the cache.
-
- If the hash was not found, the workflow skips down to download from
- the mirror (L{lookupHash_done}).
-
- @type hash: L{Hash.HashObject}
- @param hash: the hash object containing the expected hash for the file
- """
- if hash.expected() is None:
- log.msg('Hash for %s was not found' % url)
- self.lookupHash_done([], hash, url, d)
- else:
- log.msg('Found hash %s for %s' % (hash.hexexpected(), url))
-
- # Lookup hash in cache
- locations = self.db.lookupHash(hash.expected(), filesOnly = True)
- self.getCachedFile(hash, req, url, d, locations)
-
- def getCachedFile(self, hash, req, url, d, locations):
- """Try to return the file from the cache, otherwise move on to a DHT lookup.
-
- @type locations: C{list} of C{dictionary}
- @param locations: the files in the cache that match the hash,
- the dictionary contains a key 'path' whose value is a
- L{twisted.python.filepath.FilePath} object for the file.
- """
- if not locations:
- log.msg('Failed to return file from cache: %s' % url)
- self.lookupHash(hash, url, d)
- return
-
- # Get the first possible location from the list
- file = locations.pop(0)['path']
- log.msg('Returning cached file: %s' % file.path)
-
- # Get it's response
- resp = static.File(file.path).renderHTTP(req)
- if isinstance(resp, defer.Deferred):
- resp.addBoth(self._getCachedFile, hash, req, url, d, locations)
- else:
- self._getCachedFile(resp, hash, req, url, d, locations)
-
- def _getCachedFile(self, resp, hash, req, url, d, locations):
- """Check the returned response to be sure it is valid."""
- if isinstance(resp, failure.Failure):
- log.msg('Got error trying to get cached file')
- log.err()
- # Try the next possible location
- self.getCachedFile(hash, req, url, d, locations)
- return
-
- log.msg('Cached response: %r' % resp)
-
- if resp.code >= 200 and resp.code < 400:
- d.callback(resp)
- else:
- # Try the next possible location
- self.getCachedFile(hash, req, url, d, locations)
-
- def lookupHash(self, hash, url, d):
- """Lookup the hash in the DHT."""
- log.msg('Looking up hash in DHT for file: %s' % url)
- key = hash.expected()
- lookupDefer = self.dht.getValue(key)
- lookupDefer.addCallback(self.lookupHash_done, hash, url, d)
-
- def lookupHash_done(self, values, hash, url, d):
- """Start the download of the file.
-
- The download will be from peers if the DHT lookup succeeded, or
- from the mirror otherwise.
-
- @type values: C{list} of C{dictionary}
- @param values: the returned values from the DHT containing peer
- download information
- """
- if not values:
- log.msg('Peers for %s were not found' % url)
- getDefer = self.peers.get(hash, url)
- getDefer.addCallback(self.cache.save_file, hash, url)
- getDefer.addErrback(self.cache.save_error, url)
- getDefer.addCallbacks(d.callback, d.errback)
- else:
- log.msg('Found peers for %s: %r' % (url, values))
- # Download from the found peers
- getDefer = self.peers.get(hash, url, values)
- getDefer.addCallback(self.check_response, hash, url)
- getDefer.addCallback(self.cache.save_file, hash, url)
- getDefer.addErrback(self.cache.save_error, url)
- getDefer.addCallbacks(d.callback, d.errback)
-
- def check_response(self, response, hash, url):
- """Check the response from peers, and download from the mirror if it is not."""
- if response.code < 200 or response.code >= 300:
- log.msg('Download from peers failed, going to direct download: %s' % url)
- getDefer = self.peers.get(hash, url)
- return getDefer
- return response
-
- def new_cached_file(self, file_path, hash, new_hash, url = None, forceDHT = False):
- """Add a newly cached file to the mirror info and/or the DHT.
-
- If the file was downloaded, set url to the path it was downloaded for.
- Doesn't add a file to the DHT unless a hash was found for it
- (but does add it anyway if forceDHT is True).
-
- @type file_path: L{twisted.python.filepath.FilePath}
- @param file_path: the location of the file in the local cache
- @type hash: L{Hash.HashObject}
- @param hash: the original (expected) hash object containing also the
- hash of the downloaded file
- @type new_hash: C{boolean}
- @param new_hash: whether the has was new to this peer, and so should
- be added to the DHT
- @type url: C{string}
- @param url: the URI of the location of the file in the mirror
- (optional, defaults to not adding the file to the mirror info)
- @type forceDHT: C{boolean}
- @param forceDHT: whether to force addition of the file to the DHT
- even if the hash was not found in a mirror
- (optional, defaults to False)
- """
- if url:
- self.mirrors.updatedFile(url, file_path)
-
- if self.my_contact and hash and new_hash and (hash.expected() is not None or forceDHT):
- return self.store(hash)
- return None
-
- def store(self, hash):
- """Add a key/value pair for the file to the DHT.
-
- Sets the key and value from the hash information, and tries to add
- it to the DHT.
- """
- key = hash.digest()
- value = {'c': self.my_contact}
- pieces = hash.pieceDigests()
-
- # Determine how to store any piece data
- if len(pieces) <= 1:
- pass
- elif len(pieces) <= DHT_PIECES:
- # Short enough to be stored with our peer contact info
- value['t'] = {'t': ''.join(pieces)}
- elif len(pieces) <= TORRENT_PIECES:
- # Short enough to be stored in a separate key in the DHT
- s = sha.new().update(''.join(pieces))
- value['h'] = s.digest()
- else:
- # Too long, must be served up by our peer HTTP server
- s = sha.new().update(''.join(pieces))
- value['l'] = s.digest()
-
- storeDefer = self.dht.storeValue(key, value)
- storeDefer.addCallback(self.store_done, hash)
- return storeDefer
-
- def store_done(self, result, hash):
- """Add a key/value pair for the pieces of the file to the DHT (if necessary)."""
- log.msg('Added %s to the DHT: %r' % (hash.hexdigest(), result))
- pieces = hash.pieceDigests()
- if len(pieces) > DHT_PIECES and len(pieces) <= TORRENT_PIECES:
- # Add the piece data key and value to the DHT
- s = sha.new().update(''.join(pieces))
- key = s.digest()
- value = {'t': ''.join(pieces)}
-
- storeDefer = self.dht.storeValue(key, value)
- storeDefer.addCallback(self.store_torrent_done, key)
- return storeDefer
- return result
-
- def store_torrent_done(self, result, key):
- """Adding the file to the DHT is complete, and so is the workflow."""
- log.msg('Added torrent string %s to the DHT: %r' % (b2ahex(key), result))
- return result
-
\ No newline at end of file
+++ /dev/null
-
-"""Loading of configuration files and parameters.
-
-@type version: L{twisted.python.versions.Version}
-@var version: the version of this program
-@type DEFAULT_CONFIG_FILES: C{list} of C{string}
-@var DEFAULT_CONFIG_FILES: the default config files to load (in order)
-@var DEFAULTS: the default config parameter values for the main program
-@var DHT_DEFAULTS: the default config parameter values for the default DHT
-
-"""
-
-import os, sys
-from ConfigParser import SafeConfigParser
-
-from twisted.python import log, versions
-
-class ConfigError(Exception):
- """Errors that occur in the loading of configuration variables."""
- def __init__(self, message):
- self.message = message
- def __str__(self):
- return repr(self.message)
-
-version = versions.Version('apt-p2p', 0, 0, 0)
-
-# Set the home parameter
-home = os.path.expandvars('${HOME}')
-if home == '${HOME}' or not os.path.isdir(home):
- home = os.path.expanduser('~')
- if not os.path.isdir(home):
- home = os.path.abspath(os.path.dirname(sys.argv[0]))
-
-DEFAULT_CONFIG_FILES=['/etc/apt-p2p/apt-p2p.conf',
- home + '/.apt-p2p/apt-p2p.conf']
-
-DEFAULTS = {
-
- # Port to listen on for all requests (TCP and UDP)
- 'PORT': '9977',
-
- # Directory to store the downloaded files in
- 'CACHE_DIR': home + '/.apt-p2p/cache',
-
- # Other directories containing packages to share with others
- # WARNING: all files in these directories will be hashed and available
- # for everybody to download
- 'OTHER_DIRS': """""",
-
- # User name to try and run as
- 'USERNAME': '',
-
- # Whether it's OK to use an IP addres from a known local/private range
- 'LOCAL_OK': 'no',
-
- # Unload the packages cache after an interval of inactivity this long.
- # The packages cache uses a lot of memory, and only takes a few seconds
- # to reload when a new request arrives.
- 'UNLOAD_PACKAGES_CACHE': '5m',
-
- # Refresh the DHT keys after this much time has passed.
- # This should be a time slightly less than the DHT's KEY_EXPIRE value.
- 'KEY_REFRESH': '57m',
-
- # Which DHT implementation to use.
- # It must be possile to do "from <DHT>.DHT import DHT" to get a class that
- # implements the IDHT interface.
- 'DHT': 'apt_p2p_Khashmir',
-
- # Whether to only run the DHT (for providing only a bootstrap node)
- 'DHT-ONLY': 'no',
-}
-
-DHT_DEFAULTS = {
- # bootstrap nodes to contact to join the DHT
- 'BOOTSTRAP': """www.camrdale.org:9977
- steveholt.hopto.org:9976""",
-
- # whether this node is a bootstrap node
- 'BOOTSTRAP_NODE': "no",
-
- # Kademlia "K" constant, this should be an even number
- 'K': '8',
-
- # SHA1 is 160 bits long
- 'HASH_LENGTH': '160',
-
- # checkpoint every this many seconds
- 'CHECKPOINT_INTERVAL': '5m', # five minutes
-
- ### SEARCHING/STORING
- # concurrent xmlrpc calls per find node/value request!
- 'CONCURRENT_REQS': '4',
-
- # how many hosts to post to
- 'STORE_REDUNDANCY': '3',
-
- # How many values to attempt to retrieve from the DHT.
- # Setting this to 0 will try and get all values (which could take a while if
- # a lot of nodes have values). Setting it negative will try to get that
- # number of results from only the closest STORE_REDUNDANCY nodes to the hash.
- # The default is a large negative number so all values from the closest
- # STORE_REDUNDANCY nodes will be retrieved.
- 'RETRIEVE_VALUES': '-10000',
-
- ### ROUTING TABLE STUFF
- # how many times in a row a node can fail to respond before it's booted from the routing table
- 'MAX_FAILURES': '3',
-
- # never ping a node more often than this
- 'MIN_PING_INTERVAL': '15m', # fifteen minutes
-
- # refresh buckets that haven't been touched in this long
- 'BUCKET_STALENESS': '1h', # one hour
-
- # expire entries older than this
- 'KEY_EXPIRE': '1h', # 60 minutes
-
- # whether to spew info about the requests/responses in the protocol
- 'SPEW': 'yes',
-}
-
-class AptP2PConfigParser(SafeConfigParser):
- """Adds 'gettime' and 'getstringlist' to ConfigParser objects.
-
- @ivar time_multipliers: the 'gettime' suffixes and the multipliers needed
- to convert them to seconds
- """
-
- time_multipliers={
- 's': 1, #seconds
- 'm': 60, #minutes
- 'h': 3600, #hours
- 'd': 86400,#days
- }
-
- def gettime(self, section, option):
- """Read the config parameter as a time value."""
- mult = 1
- value = self.get(section, option)
- if len(value) == 0:
- raise ConfigError("Configuration parse error: [%s] %s" % (section, option))
- suffix = value[-1].lower()
- if suffix in self.time_multipliers.keys():
- mult = self.time_multipliers[suffix]
- value = value[:-1]
- return int(value)*mult
-
- def getstring(self, section, option):
- """Read the config parameter as a string."""
- return self.get(section,option)
-
- def getstringlist(self, section, option):
- """Read the multi-line config parameter as a list of strings."""
- return self.get(section,option).split()
-
- def optionxform(self, option):
- """Use all uppercase in the config parameters names."""
- return option.upper()
-
-# Initialize the default config parameters
-config = AptP2PConfigParser(DEFAULTS)
-config.add_section(config.get('DEFAULT', 'DHT'))
-for k in DHT_DEFAULTS:
- config.set(config.get('DEFAULT', 'DHT'), k, DHT_DEFAULTS[k])
+++ /dev/null
-
-"""An sqlite database for storing persistent files and hashes."""
-
-from datetime import datetime, timedelta
-from pysqlite2 import dbapi2 as sqlite
-from binascii import a2b_base64, b2a_base64
-from time import sleep
-import os
-
-from twisted.python.filepath import FilePath
-from twisted.trial import unittest
-
-assert sqlite.version_info >= (2, 1)
-
-class DBExcept(Exception):
- """An error occurred in accessing the database."""
- pass
-
-class khash(str):
- """Dummy class to convert all hashes to base64 for storing in the DB."""
-
-# Initialize the database to work with 'khash' objects (binary strings)
-sqlite.register_adapter(khash, b2a_base64)
-sqlite.register_converter("KHASH", a2b_base64)
-sqlite.register_converter("khash", a2b_base64)
-sqlite.enable_callback_tracebacks(True)
-
-class DB:
- """An sqlite database for storing persistent files and hashes.
-
- @type db: L{twisted.python.filepath.FilePath}
- @ivar db: the database file to use
- @type conn: L{pysqlite2.dbapi2.Connection}
- @ivar conn: an open connection to the sqlite database
- """
-
- def __init__(self, db):
- """Load or create the database file.
-
- @type db: L{twisted.python.filepath.FilePath}
- @param db: the database file to use
- """
- self.db = db
- self.db.restat(False)
- if self.db.exists():
- self._loadDB()
- else:
- self._createNewDB()
- self.conn.text_factory = str
- self.conn.row_factory = sqlite.Row
-
- def _loadDB(self):
- """Open a new connection to the existing database file"""
- try:
- self.conn = sqlite.connect(database=self.db.path, detect_types=sqlite.PARSE_DECLTYPES)
- except:
- import traceback
- raise DBExcept, "Couldn't open DB", traceback.format_exc()
-
- def _createNewDB(self):
- """Open a connection to a new database and create the necessary tables."""
- if not self.db.parent().exists():
- self.db.parent().makedirs()
- self.conn = sqlite.connect(database=self.db.path, detect_types=sqlite.PARSE_DECLTYPES)
- c = self.conn.cursor()
- c.execute("CREATE TABLE files (path TEXT PRIMARY KEY UNIQUE, hashID INTEGER, " +
- "size NUMBER, mtime NUMBER)")
- c.execute("CREATE TABLE hashes (hashID INTEGER PRIMARY KEY AUTOINCREMENT, " +
- "hash KHASH UNIQUE, pieces KHASH, " +
- "piecehash KHASH, refreshed TIMESTAMP)")
- c.execute("CREATE INDEX hashes_refreshed ON hashes(refreshed)")
- c.execute("CREATE INDEX hashes_piecehash ON hashes(piecehash)")
- c.close()
- self.conn.commit()
-
- def _removeChanged(self, file, row):
- """If the file has changed or is missing, remove it from the DB.
-
- @type file: L{twisted.python.filepath.FilePath}
- @param file: the file to check
- @type row: C{dictionary}-like object
- @param row: contains the expected 'size' and 'mtime' of the file
- @rtype: C{boolean}
- @return: True if the file is unchanged, False if it is changed,
- and None if it is missing
- """
- res = None
- if row:
- file.restat(False)
- if file.exists():
- # Compare the current with the expected file properties
- res = (row['size'] == file.getsize() and row['mtime'] == file.getmtime())
- if not res:
- # Remove the file from the database
- c = self.conn.cursor()
- c.execute("DELETE FROM files WHERE path = ?", (file.path, ))
- self.conn.commit()
- c.close()
- return res
-
- def storeFile(self, file, hash, pieces = ''):
- """Store or update a file in the database.
-
- @type file: L{twisted.python.filepath.FilePath}
- @param file: the file to check
- @type hash: C{string}
- @param hash: the hash of the file
- @type pieces: C{string}
- @param pieces: the concatenated list of the hashes of the pieces of
- the file (optional, defaults to the empty string)
- @return: True if the hash was not in the database before
- (so it needs to be added to the DHT)
- """
- # Hash the pieces to get the piecehash
- piecehash = ''
- if pieces:
- s = sha.new().update(pieces)
- piecehash = sha.digest()
-
- # Check the database for the hash
- c = self.conn.cursor()
- c.execute("SELECT hashID, piecehash FROM hashes WHERE hash = ?", (khash(hash), ))
- row = c.fetchone()
- if row:
- assert piecehash == row['piecehash']
- new_hash = False
- hashID = row['hashID']
- else:
- # Add the new hash to the database
- c = self.conn.cursor()
- c.execute("INSERT OR REPLACE INTO hashes (hash, pieces, piecehash, refreshed) VALUES (?, ?, ?, ?)",
- (khash(hash), khash(pieces), khash(piecehash), datetime.now()))
- self.conn.commit()
- new_hash = True
- hashID = c.lastrowid
-
- # Add the file to the database
- file.restat()
- c.execute("INSERT OR REPLACE INTO files (path, hashID, size, mtime) VALUES (?, ?, ?, ?)",
- (file.path, hashID, file.getsize(), file.getmtime()))
- self.conn.commit()
- c.close()
-
- return new_hash
-
- def getFile(self, file):
- """Get a file from the database.
-
- If it has changed or is missing, it is removed from the database.
-
- @type file: L{twisted.python.filepath.FilePath}
- @param file: the file to check
- @return: dictionary of info for the file, False if changed, or
- None if not in database or missing
- """
- c = self.conn.cursor()
- c.execute("SELECT hash, size, mtime, pieces FROM files JOIN hashes USING (hashID) WHERE path = ?", (file.path, ))
- row = c.fetchone()
- res = None
- if row:
- res = self._removeChanged(file, row)
- if res:
- res = {}
- res['hash'] = row['hash']
- res['size'] = row['size']
- res['pieces'] = row['pieces']
- c.close()
- return res
-
- def lookupHash(self, hash, filesOnly = False):
- """Find a file by hash in the database.
-
- If any found files have changed or are missing, they are removed
- from the database. If filesOnly is False then it will also look for
- piece string hashes if no files can be found.
-
- @return: list of dictionaries of info for the found files
- """
- # Try to find the hash in the files table
- c = self.conn.cursor()
- c.execute("SELECT path, size, mtime, refreshed, pieces FROM files JOIN hashes USING (hashID) WHERE hash = ?", (khash(hash), ))
- row = c.fetchone()
- files = []
- while row:
- # Save the file to the list of found files
- file = FilePath(row['path'])
- res = self._removeChanged(file, row)
- if res:
- res = {}
- res['path'] = file
- res['size'] = row['size']
- res['refreshed'] = row['refreshed']
- res['pieces'] = row['pieces']
- files.append(res)
- row = c.fetchone()
-
- if not filesOnly and not files:
- # No files were found, so check the piecehashes as well
- c.execute("SELECT refreshed, pieces, piecehash FROM hashes WHERE piecehash = ?", (khash(hash), ))
- row = c.fetchone()
- if row:
- res = {}
- res['refreshed'] = row['refreshed']
- res['pieces'] = row['pieces']
- files.append(res)
-
- c.close()
- return files
-
- def isUnchanged(self, file):
- """Check if a file in the file system has changed.
-
- If it has changed, it is removed from the database.
-
- @return: True if unchanged, False if changed, None if not in database
- """
- c = self.conn.cursor()
- c.execute("SELECT size, mtime FROM files WHERE path = ?", (file.path, ))
- row = c.fetchone()
- return self._removeChanged(file, row)
-
- def refreshHash(self, hash):
- """Refresh the publishing time of a hash."""
- c = self.conn.cursor()
- c.execute("UPDATE hashes SET refreshed = ? WHERE hash = ?", (datetime.now(), khash(hash)))
- c.close()
-
- def expiredHashes(self, expireAfter):
- """Find files that need refreshing after expireAfter seconds.
-
- For each hash that needs refreshing, finds all the files with that hash.
- If the file has changed or is missing, it is removed from the table.
-
- @return: dictionary with keys the hashes, values a list of FilePaths
- """
- t = datetime.now() - timedelta(seconds=expireAfter)
-
- # Find all the hashes that need refreshing
- c = self.conn.cursor()
- c.execute("SELECT hashID, hash, pieces FROM hashes WHERE refreshed < ?", (t, ))
- row = c.fetchone()
- expired = {}
- while row:
- res = expired.setdefault(row['hash'], {})
- res['hashID'] = row['hashID']
- res['hash'] = row['hash']
- res['pieces'] = row['pieces']
- row = c.fetchone()
-
- # Make sure there are still valid files for each hash
- for hash in expired.values():
- valid = False
- c.execute("SELECT path, size, mtime FROM files WHERE hashID = ?", (hash['hashID'], ))
- row = c.fetchone()
- while row:
- res = self._removeChanged(FilePath(row['path']), row)
- if res:
- valid = True
- row = c.fetchone()
- if not valid:
- # Remove hashes for which no files are still available
- del expired[hash['hash']]
- c.execute("DELETE FROM hashes WHERE hashID = ?", (hash['hashID'], ))
-
- self.conn.commit()
- c.close()
-
- return expired
-
- def removeUntrackedFiles(self, dirs):
- """Remove files that are no longer tracked by the program.
-
- @type dirs: C{list} of L{twisted.python.filepath.FilePath}
- @param dirs: a list of the directories that we are tracking
- @return: list of files that were removed
- """
- assert len(dirs) >= 1
-
- # Create a list of globs and an SQL statement for the directories
- newdirs = []
- sql = "WHERE"
- for dir in dirs:
- newdirs.append(dir.child('*').path)
- sql += " path NOT GLOB ? AND"
- sql = sql[:-4]
-
- # Get a listing of all the files that will be removed
- c = self.conn.cursor()
- c.execute("SELECT path FROM files " + sql, newdirs)
- row = c.fetchone()
- removed = []
- while row:
- removed.append(FilePath(row['path']))
- row = c.fetchone()
-
- # Delete all the removed files from the database
- if removed:
- c.execute("DELETE FROM files " + sql, newdirs)
- self.conn.commit()
-
- return removed
-
- def close(self):
- """Close the database connection."""
- self.conn.close()
-
-class TestDB(unittest.TestCase):
- """Tests for the khashmir database."""
-
- timeout = 5
- db = FilePath('/tmp/khashmir.db')
- hash = '\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!'
- directory = FilePath('/tmp/apt-p2p/')
- file = FilePath('/tmp/apt-p2p/khashmir.test')
- testfile = 'tmp/khashmir.test'
- dirs = [FilePath('/tmp/apt-p2p/top1'),
- FilePath('/tmp/apt-p2p/top2/sub1'),
- FilePath('/tmp/apt-p2p/top2/sub2/')]
-
- def setUp(self):
- if not self.file.parent().exists():
- self.file.parent().makedirs()
- self.file.setContent('fgfhds')
- self.file.touch()
- self.store = DB(self.db)
- self.store.storeFile(self.file, self.hash)
-
- def test_openExistingDB(self):
- """Tests opening an existing database."""
- self.store.close()
- self.store = None
- sleep(1)
- self.store = DB(self.db)
- res = self.store.isUnchanged(self.file)
- self.failUnless(res)
-
- def test_getFile(self):
- """Tests retrieving a file from the database."""
- res = self.store.getFile(self.file)
- self.failUnless(res)
- self.failUnlessEqual(res['hash'], self.hash)
-
- def test_lookupHash(self):
- """Tests looking up a hash in the database."""
- res = self.store.lookupHash(self.hash)
- self.failUnless(res)
- self.failUnlessEqual(len(res), 1)
- self.failUnlessEqual(res[0]['path'].path, self.file.path)
-
- def test_isUnchanged(self):
- """Tests checking if a file in the database is unchanged."""
- res = self.store.isUnchanged(self.file)
- self.failUnless(res)
- sleep(2)
- self.file.touch()
- res = self.store.isUnchanged(self.file)
- self.failUnless(res == False)
- res = self.store.isUnchanged(self.file)
- self.failUnless(res is None)
-
- def test_expiry(self):
- """Tests retrieving the files from the database that have expired."""
- res = self.store.expiredHashes(1)
- self.failUnlessEqual(len(res.keys()), 0)
- sleep(2)
- res = self.store.expiredHashes(1)
- self.failUnlessEqual(len(res.keys()), 1)
- self.failUnlessEqual(res.keys()[0], self.hash)
- self.store.refreshHash(self.hash)
- res = self.store.expiredHashes(1)
- self.failUnlessEqual(len(res.keys()), 0)
-
- def build_dirs(self):
- for dir in self.dirs:
- file = dir.preauthChild(self.testfile)
- if not file.parent().exists():
- file.parent().makedirs()
- file.setContent(file.path)
- file.touch()
- self.store.storeFile(file, self.hash)
-
- def test_multipleHashes(self):
- """Tests looking up a hash with multiple files in the database."""
- self.build_dirs()
- res = self.store.expiredHashes(1)
- self.failUnlessEqual(len(res.keys()), 0)
- res = self.store.lookupHash(self.hash)
- self.failUnless(res)
- self.failUnlessEqual(len(res), 4)
- self.failUnlessEqual(res[0]['refreshed'], res[1]['refreshed'])
- self.failUnlessEqual(res[0]['refreshed'], res[2]['refreshed'])
- self.failUnlessEqual(res[0]['refreshed'], res[3]['refreshed'])
- sleep(2)
- res = self.store.expiredHashes(1)
- self.failUnlessEqual(len(res.keys()), 1)
- self.failUnlessEqual(res.keys()[0], self.hash)
- self.store.refreshHash(self.hash)
- res = self.store.expiredHashes(1)
- self.failUnlessEqual(len(res.keys()), 0)
-
- def test_removeUntracked(self):
- """Tests removing untracked files from the database."""
- self.build_dirs()
- res = self.store.removeUntrackedFiles(self.dirs)
- self.failUnlessEqual(len(res), 1, 'Got removed paths: %r' % res)
- self.failUnlessEqual(res[0], self.file, 'Got removed paths: %r' % res)
- res = self.store.removeUntrackedFiles(self.dirs)
- self.failUnlessEqual(len(res), 0, 'Got removed paths: %r' % res)
- res = self.store.removeUntrackedFiles(self.dirs[1:])
- self.failUnlessEqual(len(res), 1, 'Got removed paths: %r' % res)
- self.failUnlessEqual(res[0], self.dirs[0].preauthChild(self.testfile), 'Got removed paths: %r' % res)
- res = self.store.removeUntrackedFiles(self.dirs[:1])
- self.failUnlessEqual(len(res), 2, 'Got removed paths: %r' % res)
- self.failUnlessIn(self.dirs[1].preauthChild(self.testfile), res, 'Got removed paths: %r' % res)
- self.failUnlessIn(self.dirs[2].preauthChild(self.testfile), res, 'Got removed paths: %r' % res)
-
- def tearDown(self):
- self.directory.remove()
- self.store.close()
- self.db.remove()
-
+++ /dev/null
-
-"""Some interfaces that are used by the apt-p2p classes."""
-
-from zope.interface import Interface
-
-class IDHT(Interface):
- """An abstract interface for using a DHT implementation."""
-
- def loadConfig(self, config, section):
- """Load the DHTs configuration from a dictionary.
-
- @type config: C{SafeConfigParser}
- @param config: the dictionary of config values
- """
-
- def join(self):
- """Bootstrap the new DHT node into the DHT.
-
- @rtype: C{Deferred}
- @return: a deferred that will fire when the node has joined
- """
-
- def leave(self):
- """Depart gracefully from the DHT.
-
- @rtype: C{Deferred}
- @return: a deferred that will fire when the node has left
- """
-
- def getValue(self, key):
- """Get a value from the DHT for the specified key.
-
- The length of the key may be adjusted for use with the DHT.
-
- @rtype: C{Deferred}
- @return: a deferred that will fire with the stored values
- """
-
- def storeValue(self, key, value):
- """Store a value in the DHT for the specified key.
-
- The length of the key may be adjusted for use with the DHT.
- """
+++ /dev/null
-# -*- test-case-name: twisted.test.test_policies -*-
-# Copyright (c) 2001-2007 Twisted Matrix Laboratories.
-# See LICENSE for details.
-
-
-"""
-Resource limiting policies.
-
-@seealso: See also L{twisted.protocols.htb} for rate limiting.
-"""
-
-# system imports
-import sys, operator
-
-# twisted imports
-from twisted.internet.protocol import ServerFactory, Protocol, ClientFactory
-from twisted.internet import reactor, error
-from twisted.python import log
-from zope.interface import providedBy, directlyProvides
-
-
-class ProtocolWrapper(Protocol):
- """Wraps protocol instances and acts as their transport as well."""
-
- disconnecting = 0
-
- def __init__(self, factory, wrappedProtocol):
- self.wrappedProtocol = wrappedProtocol
- self.factory = factory
-
- def makeConnection(self, transport):
- directlyProvides(self, *providedBy(self) + providedBy(transport))
- Protocol.makeConnection(self, transport)
-
- # Transport relaying
-
- def write(self, data):
- self.transport.write(data)
-
- def writeSequence(self, data):
- self.transport.writeSequence(data)
-
- def loseConnection(self):
- self.disconnecting = 1
- self.transport.loseConnection()
-
- def getPeer(self):
- return self.transport.getPeer()
-
- def getHost(self):
- return self.transport.getHost()
-
- def registerProducer(self, producer, streaming):
- self.transport.registerProducer(producer, streaming)
-
- def unregisterProducer(self):
- self.transport.unregisterProducer()
-
- def stopConsuming(self):
- self.transport.stopConsuming()
-
- def __getattr__(self, name):
- return getattr(self.transport, name)
-
- # Protocol relaying
-
- def connectionMade(self):
- self.factory.registerProtocol(self)
- self.wrappedProtocol.makeConnection(self)
-
- def dataReceived(self, data):
- self.wrappedProtocol.dataReceived(data)
-
- def connectionLost(self, reason):
- self.factory.unregisterProtocol(self)
- self.wrappedProtocol.connectionLost(reason)
-
-
-class WrappingFactory(ClientFactory):
- """Wraps a factory and its protocols, and keeps track of them."""
-
- protocol = ProtocolWrapper
-
- def __init__(self, wrappedFactory):
- self.wrappedFactory = wrappedFactory
- self.protocols = {}
-
- def doStart(self):
- self.wrappedFactory.doStart()
- ClientFactory.doStart(self)
-
- def doStop(self):
- self.wrappedFactory.doStop()
- ClientFactory.doStop(self)
-
- def startedConnecting(self, connector):
- self.wrappedFactory.startedConnecting(connector)
-
- def clientConnectionFailed(self, connector, reason):
- self.wrappedFactory.clientConnectionFailed(connector, reason)
-
- def clientConnectionLost(self, connector, reason):
- self.wrappedFactory.clientConnectionLost(connector, reason)
-
- def buildProtocol(self, addr):
- return self.protocol(self, self.wrappedFactory.buildProtocol(addr))
-
- def registerProtocol(self, p):
- """Called by protocol to register itself."""
- self.protocols[p] = 1
-
- def unregisterProtocol(self, p):
- """Called by protocols when they go away."""
- del self.protocols[p]
-
-
-class ThrottlingProtocol(ProtocolWrapper):
- """Protocol for ThrottlingFactory."""
-
- # wrap API for tracking bandwidth
-
- def __init__(self, factory, wrappedProtocol):
- ProtocolWrapper.__init__(self, factory, wrappedProtocol)
- self._tempDataBuffer = []
- self._tempDataLength = 0
- self.throttled = False
-
- def write(self, data):
- # Check if we can write
- if not self.throttled:
- paused = self.factory.registerWritten(len(data))
- if not paused:
- ProtocolWrapper.write(self, data)
-
- if paused is not None and hasattr(self, "producer") and self.producer and not self.producer.paused:
- # Interrupt the flow so that others can can have a chance
- # We can only do this if it's not already paused otherwise we
- # risk unpausing something that the Server paused
- self.producer.pauseProducing()
- reactor.callLater(0, self.producer.resumeProducing)
-
- if self.throttled or paused:
- # Can't write, buffer the data
- self._tempDataBuffer.append(data)
- self._tempDataLength += len(data)
- self._throttleWrites()
-
- def writeSequence(self, seq):
- if not self.throttled:
- # Write each sequence separately
- while seq and not self.factory.registerWritten(len(seq[0])):
- ProtocolWrapper.write(self, seq.pop(0))
-
- # If there's some left, we must have been paused
- if seq:
- self._tempDataBuffer.extend(seq)
- self._tempDataLength += reduce(operator.add, map(len, seq))
- self._throttleWrites()
-
- def dataReceived(self, data):
- self.factory.registerRead(len(data))
- ProtocolWrapper.dataReceived(self, data)
-
- def registerProducer(self, producer, streaming):
- assert streaming, "You can only use the ThrottlingProtocol with streaming (push) producers."
- self.producer = producer
- ProtocolWrapper.registerProducer(self, producer, streaming)
-
- def unregisterProducer(self):
- del self.producer
- ProtocolWrapper.unregisterProducer(self)
-
-
- def throttleReads(self):
- self.transport.pauseProducing()
-
- def unthrottleReads(self):
- self.transport.resumeProducing()
-
- def _throttleWrites(self):
- # If we haven't yet, queue for unthrottling
- if not self.throttled:
- self.throttled = True
- self.factory.throttledWrites(self)
-
- if hasattr(self, "producer") and self.producer:
- self.producer.pauseProducing()
-
- def unthrottleWrites(self):
- # Write some data
- if self._tempDataBuffer:
- assert not self.factory.registerWritten(len(self._tempDataBuffer[0]))
- self._tempDataLength -= len(self._tempDataBuffer[0])
- ProtocolWrapper.write(self, self._tempDataBuffer.pop(0))
- assert self._tempDataLength >= 0
-
- # If we wrote it all, start producing more
- if not self._tempDataBuffer:
- assert self._tempDataLength == 0
- self.throttled = False
- if hasattr(self, "producer") and self.producer:
- # This might unpause something the Server has also paused, but
- # it will get paused again on first write anyway
- reactor.callLater(0, self.producer.resumeProducing)
-
- return self._tempDataLength
-
-
-class ThrottlingFactory(WrappingFactory):
- """
- Throttles bandwidth and number of connections.
-
- Write bandwidth will only be throttled if there is a producer
- registered.
- """
-
- protocol = ThrottlingProtocol
- CHUNK_SIZE = 4*1024
-
- def __init__(self, wrappedFactory, maxConnectionCount=sys.maxint,
- readLimit=None, writeLimit=None):
- WrappingFactory.__init__(self, wrappedFactory)
- self.connectionCount = 0
- self.maxConnectionCount = maxConnectionCount
- self.readLimit = readLimit # max bytes we should read per second
- self.writeLimit = writeLimit # max bytes we should write per second
- self.readThisSecond = 0
- self.writeAvailable = writeLimit
- self._writeQueue = []
- self.unthrottleReadsID = None
- self.checkReadBandwidthID = None
- self.unthrottleWritesID = None
- self.checkWriteBandwidthID = None
-
-
- def callLater(self, period, func):
- """
- Wrapper around L{reactor.callLater} for test purpose.
- """
- return reactor.callLater(period, func)
-
-
- def registerWritten(self, length):
- """
- Called by protocol to tell us more bytes were written.
- Returns True if the bytes could not be written and the protocol should pause itself.
- """
- # Check if there are bytes available to write
- if self.writeLimit is None:
- return None
- elif self.writeAvailable > 0:
- self.writeAvailable -= length
- return False
-
- return True
-
-
- def throttledWrites(self, p):
- """
- Called by the protocol to queue it for later writing.
- """
- assert p not in self._writeQueue
- self._writeQueue.append(p)
-
-
- def registerRead(self, length):
- """
- Called by protocol to tell us more bytes were read.
- """
- self.readThisSecond += length
-
-
- def checkReadBandwidth(self):
- """
- Checks if we've passed bandwidth limits.
- """
- if self.readThisSecond > self.readLimit:
- self.throttleReads()
- throttleTime = (float(self.readThisSecond) / self.readLimit) - 1.0
- self.unthrottleReadsID = self.callLater(throttleTime,
- self.unthrottleReads)
- self.readThisSecond = 0
- self.checkReadBandwidthID = self.callLater(1, self.checkReadBandwidth)
-
-
- def checkWriteBandwidth(self):
- """
- Add some new available bandwidth, and check for protocols to unthrottle.
- """
- # Increase the available write bytes, but not higher than the limit
- self.writeAvailable = min(self.writeLimit, self.writeAvailable + self.writeLimit)
-
- # Write from the queue until it's empty or we're throttled again
- while self.writeAvailable > 0 and self._writeQueue:
- # Get the first queued protocol
- p = self._writeQueue.pop(0)
- _tempWriteAvailable = self.writeAvailable
- bytesLeft = 1
-
- # Unthrottle writes until CHUNK_SIZE is reached or the protocol is unbuffered
- while self.writeAvailable > 0 and _tempWriteAvailable - self.writeAvailable < self.CHUNK_SIZE and bytesLeft > 0:
- # Unthrottle a single write (from the protocol's buffer)
- bytesLeft = p.unthrottleWrites()
-
- # If the protocol is not done, requeue it
- if bytesLeft > 0:
- self._writeQueue.append(p)
-
- self.checkWriteBandwidthID = self.callLater(1, self.checkWriteBandwidth)
-
-
- def throttleReads(self):
- """
- Throttle reads on all protocols.
- """
- log.msg("Throttling reads on %s" % self)
- for p in self.protocols.keys():
- p.throttleReads()
-
-
- def unthrottleReads(self):
- """
- Stop throttling reads on all protocols.
- """
- self.unthrottleReadsID = None
- log.msg("Stopped throttling reads on %s" % self)
- for p in self.protocols.keys():
- p.unthrottleReads()
-
-
- def buildProtocol(self, addr):
- if self.connectionCount == 0:
- if self.readLimit is not None:
- self.checkReadBandwidth()
- if self.writeLimit is not None:
- self.checkWriteBandwidth()
-
- if self.connectionCount < self.maxConnectionCount:
- self.connectionCount += 1
- return WrappingFactory.buildProtocol(self, addr)
- else:
- log.msg("Max connection count reached!")
- return None
-
-
- def unregisterProtocol(self, p):
- WrappingFactory.unregisterProtocol(self, p)
- self.connectionCount -= 1
- if self.connectionCount == 0:
- if self.unthrottleReadsID is not None:
- self.unthrottleReadsID.cancel()
- if self.checkReadBandwidthID is not None:
- self.checkReadBandwidthID.cancel()
- if self.unthrottleWritesID is not None:
- self.unthrottleWritesID.cancel()
- if self.checkWriteBandwidthID is not None:
- self.checkWriteBandwidthID.cancel()
-
-
-
-class SpewingProtocol(ProtocolWrapper):
- def dataReceived(self, data):
- log.msg("Received: %r" % data)
- ProtocolWrapper.dataReceived(self,data)
-
- def write(self, data):
- log.msg("Sending: %r" % data)
- ProtocolWrapper.write(self,data)
-
-
-
-class SpewingFactory(WrappingFactory):
- protocol = SpewingProtocol
-
-
-
-class LimitConnectionsByPeer(WrappingFactory):
- """Stability: Unstable"""
-
- maxConnectionsPerPeer = 5
-
- def startFactory(self):
- self.peerConnections = {}
-
- def buildProtocol(self, addr):
- peerHost = addr[0]
- connectionCount = self.peerConnections.get(peerHost, 0)
- if connectionCount >= self.maxConnectionsPerPeer:
- return None
- self.peerConnections[peerHost] = connectionCount + 1
- return WrappingFactory.buildProtocol(self, addr)
-
- def unregisterProtocol(self, p):
- peerHost = p.getPeer()[1]
- self.peerConnections[peerHost] -= 1
- if self.peerConnections[peerHost] == 0:
- del self.peerConnections[peerHost]
-
-
-class LimitTotalConnectionsFactory(ServerFactory):
- """Factory that limits the number of simultaneous connections.
-
- API Stability: Unstable
-
- @type connectionCount: C{int}
- @ivar connectionCount: number of current connections.
- @type connectionLimit: C{int} or C{None}
- @cvar connectionLimit: maximum number of connections.
- @type overflowProtocol: L{Protocol} or C{None}
- @cvar overflowProtocol: Protocol to use for new connections when
- connectionLimit is exceeded. If C{None} (the default value), excess
- connections will be closed immediately.
- """
- connectionCount = 0
- connectionLimit = None
- overflowProtocol = None
-
- def buildProtocol(self, addr):
- if (self.connectionLimit is None or
- self.connectionCount < self.connectionLimit):
- # Build the normal protocol
- wrappedProtocol = self.protocol()
- elif self.overflowProtocol is None:
- # Just drop the connection
- return None
- else:
- # Too many connections, so build the overflow protocol
- wrappedProtocol = self.overflowProtocol()
-
- wrappedProtocol.factory = self
- protocol = ProtocolWrapper(self, wrappedProtocol)
- self.connectionCount += 1
- return protocol
-
- def registerProtocol(self, p):
- pass
-
- def unregisterProtocol(self, p):
- self.connectionCount -= 1
-
-
-
-class TimeoutProtocol(ProtocolWrapper):
- """
- Protocol that automatically disconnects when the connection is idle.
-
- Stability: Unstable
- """
-
- def __init__(self, factory, wrappedProtocol, timeoutPeriod):
- """
- Constructor.
-
- @param factory: An L{IFactory}.
- @param wrappedProtocol: A L{Protocol} to wrapp.
- @param timeoutPeriod: Number of seconds to wait for activity before
- timing out.
- """
- ProtocolWrapper.__init__(self, factory, wrappedProtocol)
- self.timeoutCall = None
- self.setTimeout(timeoutPeriod)
-
-
- def setTimeout(self, timeoutPeriod=None):
- """
- Set a timeout.
-
- This will cancel any existing timeouts.
-
- @param timeoutPeriod: If not C{None}, change the timeout period.
- Otherwise, use the existing value.
- """
- self.cancelTimeout()
- if timeoutPeriod is not None:
- self.timeoutPeriod = timeoutPeriod
- self.timeoutCall = self.factory.callLater(self.timeoutPeriod, self.timeoutFunc)
-
-
- def cancelTimeout(self):
- """
- Cancel the timeout.
-
- If the timeout was already cancelled, this does nothing.
- """
- if self.timeoutCall:
- try:
- self.timeoutCall.cancel()
- except error.AlreadyCalled:
- pass
- self.timeoutCall = None
-
-
- def resetTimeout(self):
- """
- Reset the timeout, usually because some activity just happened.
- """
- if self.timeoutCall:
- self.timeoutCall.reset(self.timeoutPeriod)
-
-
- def write(self, data):
- self.resetTimeout()
- ProtocolWrapper.write(self, data)
-
-
- def writeSequence(self, seq):
- self.resetTimeout()
- ProtocolWrapper.writeSequence(self, seq)
-
-
- def dataReceived(self, data):
- self.resetTimeout()
- ProtocolWrapper.dataReceived(self, data)
-
-
- def connectionLost(self, reason):
- self.cancelTimeout()
- ProtocolWrapper.connectionLost(self, reason)
-
-
- def timeoutFunc(self):
- """
- This method is called when the timeout is triggered.
-
- By default it calls L{loseConnection}. Override this if you want
- something else to happen.
- """
- self.loseConnection()
-
-
-
-class TimeoutFactory(WrappingFactory):
- """
- Factory for TimeoutWrapper.
-
- Stability: Unstable
- """
- protocol = TimeoutProtocol
-
-
- def __init__(self, wrappedFactory, timeoutPeriod=30*60):
- self.timeoutPeriod = timeoutPeriod
- WrappingFactory.__init__(self, wrappedFactory)
-
-
- def buildProtocol(self, addr):
- return self.protocol(self, self.wrappedFactory.buildProtocol(addr),
- timeoutPeriod=self.timeoutPeriod)
-
-
- def callLater(self, period, func):
- """
- Wrapper around L{reactor.callLater} for test purpose.
- """
- return reactor.callLater(period, func)
-
-
-
-class TrafficLoggingProtocol(ProtocolWrapper):
-
- def __init__(self, factory, wrappedProtocol, logfile, lengthLimit=None,
- number=0):
- """
- @param factory: factory which created this protocol.
- @type factory: C{protocol.Factory}.
- @param wrappedProtocol: the underlying protocol.
- @type wrappedProtocol: C{protocol.Protocol}.
- @param logfile: file opened for writing used to write log messages.
- @type logfile: C{file}
- @param lengthLimit: maximum size of the datareceived logged.
- @type lengthLimit: C{int}
- @param number: identifier of the connection.
- @type number: C{int}.
- """
- ProtocolWrapper.__init__(self, factory, wrappedProtocol)
- self.logfile = logfile
- self.lengthLimit = lengthLimit
- self._number = number
-
-
- def _log(self, line):
- self.logfile.write(line + '\n')
- self.logfile.flush()
-
-
- def _mungeData(self, data):
- if self.lengthLimit and len(data) > self.lengthLimit:
- data = data[:self.lengthLimit - 12] + '<... elided>'
- return data
-
-
- # IProtocol
- def connectionMade(self):
- self._log('*')
- return ProtocolWrapper.connectionMade(self)
-
-
- def dataReceived(self, data):
- self._log('C %d: %r' % (self._number, self._mungeData(data)))
- return ProtocolWrapper.dataReceived(self, data)
-
-
- def connectionLost(self, reason):
- self._log('C %d: %r' % (self._number, reason))
- return ProtocolWrapper.connectionLost(self, reason)
-
-
- # ITransport
- def write(self, data):
- self._log('S %d: %r' % (self._number, self._mungeData(data)))
- return ProtocolWrapper.write(self, data)
-
-
- def writeSequence(self, iovec):
- self._log('SV %d: %r' % (self._number, [self._mungeData(d) for d in iovec]))
- return ProtocolWrapper.writeSequence(self, iovec)
-
-
- def loseConnection(self):
- self._log('S %d: *' % (self._number,))
- return ProtocolWrapper.loseConnection(self)
-
-
-
-class TrafficLoggingFactory(WrappingFactory):
- protocol = TrafficLoggingProtocol
-
- _counter = 0
-
- def __init__(self, wrappedFactory, logfilePrefix, lengthLimit=None):
- self.logfilePrefix = logfilePrefix
- self.lengthLimit = lengthLimit
- WrappingFactory.__init__(self, wrappedFactory)
-
-
- def open(self, name):
- return file(name, 'w')
-
-
- def buildProtocol(self, addr):
- self._counter += 1
- logfile = self.open(self.logfilePrefix + '-' + str(self._counter))
- return self.protocol(self, self.wrappedFactory.buildProtocol(addr),
- logfile, self.lengthLimit, self._counter)
-
-
- def resetCounter(self):
- """
- Reset the value of the counter used to identify connections.
- """
- self._counter = 0
-
-
-
-class TimeoutMixin:
- """Mixin for protocols which wish to timeout connections
-
- @cvar timeOut: The number of seconds after which to timeout the connection.
- """
- timeOut = None
-
- __timeoutCall = None
-
- def callLater(self, period, func):
- return reactor.callLater(period, func)
-
-
- def resetTimeout(self):
- """Reset the timeout count down"""
- if self.__timeoutCall is not None and self.timeOut is not None:
- self.__timeoutCall.reset(self.timeOut)
-
- def setTimeout(self, period):
- """Change the timeout period
-
- @type period: C{int} or C{NoneType}
- @param period: The period, in seconds, to change the timeout to, or
- C{None} to disable the timeout.
- """
- prev = self.timeOut
- self.timeOut = period
-
- if self.__timeoutCall is not None:
- if period is None:
- self.__timeoutCall.cancel()
- self.__timeoutCall = None
- else:
- self.__timeoutCall.reset(period)
- elif period is not None:
- self.__timeoutCall = self.callLater(period, self.__timedOut)
-
- return prev
-
- def __timedOut(self):
- self.__timeoutCall = None
- self.timeoutConnection()
-
- def timeoutConnection(self):
- """Called when the connection times out.
- Override to define behavior other than dropping the connection.
- """
- self.transport.loseConnection()
+++ /dev/null
-
-"""Some utitlity functions for use in the apt-p2p program.
-
-@var isLocal: a compiled regular expression suitable for testing if an
- IP address is from a known local or private range
-"""
-
-import os, re
-
-from twisted.python import log
-from twisted.trial import unittest
-
-isLocal = re.compile('^(192\.168\.[0-9]{1,3}\.[0-9]{1,3})|'+
- '(10\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})|'+
- '(172\.0?([1][6-9])|([2][0-9])|([3][0-1])\.[0-9]{1,3}\.[0-9]{1,3})|'+
- '(127\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})$')
-
-def findMyIPAddr(addrs, intended_port, local_ok = False):
- """Find the best IP address to use from a list of possibilities.
-
- @param addrs: the list of possible IP addresses
- @param intended_port: the port that was supposed to be used
- @param local_ok: whether known local/private IP ranges are allowed
- (defaults to False)
- @return: the preferred IP address, or None if one couldn't be found
- """
- log.msg("got addrs: %r" % (addrs,))
- my_addr = None
-
- # Try to find an address using the ifconfig function
- try:
- ifconfig = os.popen("/sbin/ifconfig |/bin/grep inet|"+
- "/usr/bin/awk '{print $2}' | "+
- "sed -e s/.*://", "r").read().strip().split('\n')
- except:
- ifconfig = []
-
- # Get counts for all the non-local addresses returned from ifconfig
- addr_count = {}
- for addr in ifconfig:
- if local_ok or not isLocal.match(addr):
- addr_count.setdefault(addr, 0)
- addr_count[addr] += 1
-
- # If only one was found, use it as a starting point
- local_addrs = addr_count.keys()
- if len(local_addrs) == 1:
- my_addr = local_addrs[0]
- log.msg('Found remote address from ifconfig: %r' % (my_addr,))
-
- # Get counts for all the non-local addresses returned from the DHT
- addr_count = {}
- port_count = {}
- for addr in addrs:
- if local_ok or not isLocal.match(addr[0]):
- addr_count.setdefault(addr[0], 0)
- addr_count[addr[0]] += 1
- port_count.setdefault(addr[1], 0)
- port_count[addr[1]] += 1
-
- # Find the most popular address
- popular_addr = []
- popular_count = 0
- for addr in addr_count:
- if addr_count[addr] > popular_count:
- popular_addr = [addr]
- popular_count = addr_count[addr]
- elif addr_count[addr] == popular_count:
- popular_addr.append(addr)
-
- # Find the most popular port
- popular_port = []
- popular_count = 0
- for port in port_count:
- if port_count[port] > popular_count:
- popular_port = [port]
- popular_count = port_count[port]
- elif port_count[port] == popular_count:
- popular_port.append(port)
-
- # Check to make sure the port isn't being changed
- port = intended_port
- if len(port_count.keys()) > 1:
- log.msg('Problem, multiple ports have been found: %r' % (port_count,))
- if port not in port_count.keys():
- log.msg('And none of the ports found match the intended one')
- elif len(port_count.keys()) == 1:
- port = port_count.keys()[0]
- else:
- log.msg('Port was not found')
-
- # If one is popular, use that address
- if len(popular_addr) == 1:
- log.msg('Found popular address: %r' % (popular_addr[0],))
- if my_addr and my_addr != popular_addr[0]:
- log.msg('But the popular address does not match: %s != %s' % (popular_addr[0], my_addr))
- my_addr = popular_addr[0]
- elif len(popular_addr) > 1:
- log.msg('Found multiple popular addresses: %r' % (popular_addr,))
- if my_addr and my_addr not in popular_addr:
- log.msg('And none of the addresses found match the ifconfig one')
- else:
- log.msg('No non-local addresses found: %r' % (popular_addr,))
-
- if not my_addr:
- log.msg("Remote IP Address could not be found for this machine")
-
- return my_addr
-
-def ipAddrFromChicken():
- """Retrieve a possible IP address from the ipchecken website."""
- import urllib
- ip_search = re.compile('\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')
- try:
- f = urllib.urlopen("http://www.ipchicken.com")
- data = f.read()
- f.close()
- current_ip = ip_search.findall(data)
- return current_ip
- except Exception:
- return []
-
-def uncompact(s):
- """Extract the contact info from a compact peer representation.
-
- @type s: C{string}
- @param s: the compact representation
- @rtype: (C{string}, C{int})
- @return: the IP address and port number to contact the peer on
- @raise ValueError: if the compact representation doesn't exist
- """
- if (len(s) != 6):
- raise ValueError
- ip = '.'.join([str(ord(i)) for i in s[0:4]])
- port = (ord(s[4]) << 8) | ord(s[5])
- return (ip, port)
-
-def compact(ip, port):
- """Create a compact representation of peer contact info.
-
- @type ip: C{string}
- @param ip: the IP address of the peer
- @type port: C{int}
- @param port: the port number to contact the peer on
- @rtype: C{string}
- @return: the compact representation
- @raise ValueError: if the compact representation doesn't exist
- """
-
- s = ''.join([chr(int(i)) for i in ip.split('.')]) + \
- chr((port & 0xFF00) >> 8) + chr(port & 0xFF)
- if len(s) != 6:
- raise ValueError
- return s
-
-class TestUtil(unittest.TestCase):
- """Tests for the utilities."""
-
- timeout = 5
- ip = '165.234.1.34'
- port = 61234
-
- def test_compact(self):
- """Make sure compacting is reversed correctly by uncompacting."""
- d = uncompact(compact(self.ip, self.port))
- self.failUnlessEqual(d[0], self.ip)
- self.failUnlessEqual(d[1], self.port)
+++ /dev/null
-
-"""The main interface to the Khashmir DHT.
-
-@var khashmir_dir: the name of the directory to use for DHT files
-"""
-
-from datetime import datetime
-import os, sha, random
-
-from twisted.internet import defer, reactor
-from twisted.internet.abstract import isIPAddress
-from twisted.python import log
-from twisted.trial import unittest
-from zope.interface import implements
-
-from apt_p2p.interfaces import IDHT
-from khashmir import Khashmir
-from bencode import bencode, bdecode
-
-khashmir_dir = 'apt-p2p-Khashmir'
-
-class DHTError(Exception):
- """Represents errors that occur in the DHT."""
-
-class DHT:
- """The main interface instance to the Khashmir DHT.
-
- @type config: C{dictionary}
- @ivar config: the DHT configuration values
- @type cache_dir: C{string}
- @ivar cache_dir: the directory to use for storing files
- @type bootstrap: C{list} of C{string}
- @ivar bootstrap: the nodes to contact to bootstrap into the system
- @type bootstrap_node: C{boolean}
- @ivar bootstrap_node: whether this node is a bootstrap node
- @type joining: L{twisted.internet.defer.Deferred}
- @ivar joining: if a join is underway, the deferred that will signal it's end
- @type joined: C{boolean}
- @ivar joined: whether the DHT network has been successfully joined
- @type outstandingJoins: C{int}
- @ivar outstandingJoins: the number of bootstrap nodes that have yet to respond
- @type foundAddrs: C{list} of (C{string}, C{int})
- @ivar foundAddrs: the IP address an port that were returned by bootstrap nodes
- @type storing: C{dictionary}
- @ivar storing: keys are keys for which store requests are active, values
- are dictionaries with keys the values being stored and values the
- deferred to call when complete
- @type retrieving: C{dictionary}
- @ivar retrieving: keys are the keys for which getValue requests are active,
- values are lists of the deferreds waiting for the requests
- @type retrieved: C{dictionary}
- @ivar retrieved: keys are the keys for which getValue requests are active,
- values are list of the values returned so far
- @type config_parser: L{apt_p2p.apt_p2p_conf.AptP2PConfigParser}
- @ivar config_parser: the configuration info for the main program
- @type section: C{string}
- @ivar section: the section of the configuration info that applies to the DHT
- @type khashmir: L{khashmir.Khashmir}
- @ivar khashmir: the khashmir DHT instance to use
- """
-
- implements(IDHT)
-
- def __init__(self):
- """Initialize the DHT."""
- self.config = None
- self.cache_dir = ''
- self.bootstrap = []
- self.bootstrap_node = False
- self.joining = None
- self.joined = False
- self.outstandingJoins = 0
- self.foundAddrs = []
- self.storing = {}
- self.retrieving = {}
- self.retrieved = {}
-
- def loadConfig(self, config, section):
- """See L{apt_p2p.interfaces.IDHT}."""
- self.config_parser = config
- self.section = section
- self.config = {}
-
- # Get some initial values
- self.cache_dir = os.path.join(self.config_parser.get(section, 'cache_dir'), khashmir_dir)
- if not os.path.exists(self.cache_dir):
- os.makedirs(self.cache_dir)
- self.bootstrap = self.config_parser.getstringlist(section, 'BOOTSTRAP')
- self.bootstrap_node = self.config_parser.getboolean(section, 'BOOTSTRAP_NODE')
- for k in self.config_parser.options(section):
- # The numbers in the config file
- if k in ['K', 'HASH_LENGTH', 'CONCURRENT_REQS', 'STORE_REDUNDANCY',
- 'RETRIEVE_VALUES', 'MAX_FAILURES', 'PORT']:
- self.config[k] = self.config_parser.getint(section, k)
- # The times in the config file
- elif k in ['CHECKPOINT_INTERVAL', 'MIN_PING_INTERVAL',
- 'BUCKET_STALENESS', 'KEY_EXPIRE']:
- self.config[k] = self.config_parser.gettime(section, k)
- # The booleans in the config file
- elif k in ['SPEW']:
- self.config[k] = self.config_parser.getboolean(section, k)
- # Everything else is a string
- else:
- self.config[k] = self.config_parser.get(section, k)
-
- def join(self):
- """See L{apt_p2p.interfaces.IDHT}."""
- if self.config is None:
- raise DHTError, "configuration not loaded"
- if self.joining:
- raise DHTError, "a join is already in progress"
-
- # Create the new khashmir instance
- self.khashmir = Khashmir(self.config, self.cache_dir)
-
- self.joining = defer.Deferred()
- for node in self.bootstrap:
- host, port = node.rsplit(':', 1)
- port = int(port)
-
- # Translate host names into IP addresses
- if isIPAddress(host):
- self._join_gotIP(host, port)
- else:
- reactor.resolve(host).addCallback(self._join_gotIP, port)
-
- return self.joining
-
- def _join_gotIP(self, ip, port):
- """Join the DHT using a single bootstrap nodes IP address."""
- self.outstandingJoins += 1
- self.khashmir.addContact(ip, port, self._join_single, self._join_error)
-
- def _join_single(self, addr):
- """Process the response from the bootstrap node.
-
- Finish the join by contacting close nodes.
- """
- self.outstandingJoins -= 1
- if addr:
- self.foundAddrs.append(addr)
- if addr or self.outstandingJoins <= 0:
- self.khashmir.findCloseNodes(self._join_complete, self._join_complete)
- log.msg('Got back from bootstrap node: %r' % (addr,))
-
- def _join_error(self, failure = None):
- """Process an error in contacting the bootstrap node.
-
- If no bootstrap nodes remain, finish the process by contacting
- close nodes.
- """
- self.outstandingJoins -= 1
- log.msg("bootstrap node could not be reached")
- if self.outstandingJoins <= 0:
- self.khashmir.findCloseNodes(self._join_complete, self._join_complete)
-
- def _join_complete(self, result):
- """End the joining process and return the addresses found for this node."""
- if not self.joined and len(result) > 0:
- self.joined = True
- if self.joining and self.outstandingJoins <= 0:
- df = self.joining
- self.joining = None
- if self.joined or self.bootstrap_node:
- self.joined = True
- df.callback(self.foundAddrs)
- else:
- df.errback(DHTError('could not find any nodes to bootstrap to'))
-
- def getAddrs(self):
- """Get the list of addresses returned by bootstrap nodes for this node."""
- return self.foundAddrs
-
- def leave(self):
- """See L{apt_p2p.interfaces.IDHT}."""
- if self.config is None:
- raise DHTError, "configuration not loaded"
-
- if self.joined or self.joining:
- if self.joining:
- self.joining.errback(DHTError('still joining when leave was called'))
- self.joining = None
- self.joined = False
- self.khashmir.shutdown()
-
- def _normKey(self, key, bits=None, bytes=None):
- """Normalize the length of keys used in the DHT."""
- bits = self.config["HASH_LENGTH"]
- if bits is not None:
- bytes = (bits - 1) // 8 + 1
- else:
- if bytes is None:
- raise DHTError, "you must specify one of bits or bytes for normalization"
-
- # Extend short keys with null bytes
- if len(key) < bytes:
- key = key + '\000'*(bytes - len(key))
- # Truncate long keys
- elif len(key) > bytes:
- key = key[:bytes]
- return key
-
- def getValue(self, key):
- """See L{apt_p2p.interfaces.IDHT}."""
- if self.config is None:
- raise DHTError, "configuration not loaded"
- if not self.joined:
- raise DHTError, "have not joined a network yet"
-
- key = self._normKey(key)
-
- d = defer.Deferred()
- if key not in self.retrieving:
- self.khashmir.valueForKey(key, self._getValue)
- self.retrieving.setdefault(key, []).append(d)
- return d
-
- def _getValue(self, key, result):
- """Process a returned list of values from the DHT."""
- # Save the list of values to return when it is complete
- if result:
- self.retrieved.setdefault(key, []).extend([bdecode(r) for r in result])
- else:
- # Empty list, the get is complete, return the result
- final_result = []
- if key in self.retrieved:
- final_result = self.retrieved[key]
- del self.retrieved[key]
- for i in range(len(self.retrieving[key])):
- d = self.retrieving[key].pop(0)
- d.callback(final_result)
- del self.retrieving[key]
-
- def storeValue(self, key, value):
- """See L{apt_p2p.interfaces.IDHT}."""
- if self.config is None:
- raise DHTError, "configuration not loaded"
- if not self.joined:
- raise DHTError, "have not joined a network yet"
-
- key = self._normKey(key)
- bvalue = bencode(value)
-
- if key in self.storing and bvalue in self.storing[key]:
- raise DHTError, "already storing that key with the same value"
-
- d = defer.Deferred()
- self.khashmir.storeValueForKey(key, bvalue, self._storeValue)
- self.storing.setdefault(key, {})[bvalue] = d
- return d
-
- def _storeValue(self, key, bvalue, result):
- """Process the response from the DHT."""
- if key in self.storing and bvalue in self.storing[key]:
- # Check if the store succeeded
- if len(result) > 0:
- self.storing[key][bvalue].callback(result)
- else:
- self.storing[key][bvalue].errback(DHTError('could not store value %s in key %s' % (bvalue, key)))
- del self.storing[key][bvalue]
- if len(self.storing[key].keys()) == 0:
- del self.storing[key]
-
-class TestSimpleDHT(unittest.TestCase):
- """Simple 2-node unit tests for the DHT."""
-
- timeout = 2
- DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160,
- 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4,
- 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000,
- 'MAX_FAILURES': 3,
- 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600,
- 'KEY_EXPIRE': 3600, 'SPEW': False, }
-
- def setUp(self):
- self.a = DHT()
- self.b = DHT()
- self.a.config = self.DHT_DEFAULTS.copy()
- self.a.config['PORT'] = 4044
- self.a.bootstrap = ["127.0.0.1:4044"]
- self.a.bootstrap_node = True
- self.a.cache_dir = '/tmp'
- self.b.config = self.DHT_DEFAULTS.copy()
- self.b.config['PORT'] = 4045
- self.b.bootstrap = ["127.0.0.1:4044"]
- self.b.cache_dir = '/tmp'
-
- def test_bootstrap_join(self):
- d = self.a.join()
- return d
-
- def node_join(self, result):
- d = self.b.join()
- return d
-
- def test_join(self):
- self.lastDefer = defer.Deferred()
- d = self.a.join()
- d.addCallback(self.node_join)
- d.addCallback(self.lastDefer.callback)
- return self.lastDefer
-
- def test_normKey(self):
- h = self.a._normKey('12345678901234567890')
- self.failUnless(h == '12345678901234567890')
- h = self.a._normKey('12345678901234567')
- self.failUnless(h == '12345678901234567\000\000\000')
- h = self.a._normKey('1234567890123456789012345')
- self.failUnless(h == '12345678901234567890')
- h = self.a._normKey('1234567890123456789')
- self.failUnless(h == '1234567890123456789\000')
- h = self.a._normKey('123456789012345678901')
- self.failUnless(h == '12345678901234567890')
-
- def value_stored(self, result, value):
- self.stored -= 1
- if self.stored == 0:
- self.get_values()
-
- def store_values(self, result):
- self.stored = 3
- d = self.a.storeValue(sha.new('4045').digest(), str(4045*3))
- d.addCallback(self.value_stored, 4045)
- d = self.a.storeValue(sha.new('4044').digest(), str(4044*2))
- d.addCallback(self.value_stored, 4044)
- d = self.b.storeValue(sha.new('4045').digest(), str(4045*2))
- d.addCallback(self.value_stored, 4045)
-
- def check_values(self, result, values):
- self.checked -= 1
- self.failUnless(len(result) == len(values))
- for v in result:
- self.failUnless(v in values)
- if self.checked == 0:
- self.lastDefer.callback(1)
-
- def get_values(self):
- self.checked = 4
- d = self.a.getValue(sha.new('4044').digest())
- d.addCallback(self.check_values, [str(4044*2)])
- d = self.b.getValue(sha.new('4044').digest())
- d.addCallback(self.check_values, [str(4044*2)])
- d = self.a.getValue(sha.new('4045').digest())
- d.addCallback(self.check_values, [str(4045*2), str(4045*3)])
- d = self.b.getValue(sha.new('4045').digest())
- d.addCallback(self.check_values, [str(4045*2), str(4045*3)])
-
- def test_store(self):
- from twisted.internet.base import DelayedCall
- DelayedCall.debug = True
- self.lastDefer = defer.Deferred()
- d = self.a.join()
- d.addCallback(self.node_join)
- d.addCallback(self.store_values)
- return self.lastDefer
-
- def tearDown(self):
- self.a.leave()
- try:
- os.unlink(self.a.khashmir.store.db)
- except:
- pass
- self.b.leave()
- try:
- os.unlink(self.b.khashmir.store.db)
- except:
- pass
-
-class TestMultiDHT(unittest.TestCase):
- """More complicated 20-node tests for the DHT."""
-
- timeout = 60
- num = 20
- DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160,
- 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4,
- 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000,
- 'MAX_FAILURES': 3,
- 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600,
- 'KEY_EXPIRE': 3600, 'SPEW': False, }
-
- def setUp(self):
- self.l = []
- self.startport = 4081
- for i in range(self.num):
- self.l.append(DHT())
- self.l[i].config = self.DHT_DEFAULTS.copy()
- self.l[i].config['PORT'] = self.startport + i
- self.l[i].bootstrap = ["127.0.0.1:" + str(self.startport)]
- self.l[i].cache_dir = '/tmp'
- self.l[0].bootstrap_node = True
-
- def node_join(self, result, next_node):
- d = self.l[next_node].join()
- if next_node + 1 < len(self.l):
- d.addCallback(self.node_join, next_node + 1)
- else:
- d.addCallback(self.lastDefer.callback)
-
- def test_join(self):
- self.timeout = 2
- self.lastDefer = defer.Deferred()
- d = self.l[0].join()
- d.addCallback(self.node_join, 1)
- return self.lastDefer
-
- def store_values(self, result, i = 0, j = 0):
- if j > i:
- j -= i+1
- i += 1
- if i == len(self.l):
- self.get_values()
- else:
- d = self.l[j].storeValue(sha.new(str(self.startport+i)).digest(), str((self.startport+i)*(j+1)))
- d.addCallback(self.store_values, i, j+1)
-
- def get_values(self, result = None, check = None, i = 0, j = 0):
- if result is not None:
- self.failUnless(len(result) == len(check))
- for v in result:
- self.failUnless(v in check)
- if j >= len(self.l):
- j -= len(self.l)
- i += 1
- if i == len(self.l):
- self.lastDefer.callback(1)
- else:
- d = self.l[i].getValue(sha.new(str(self.startport+j)).digest())
- check = []
- for k in range(self.startport+j, (self.startport+j)*(j+1)+1, self.startport+j):
- check.append(str(k))
- d.addCallback(self.get_values, check, i, j + random.randrange(1, min(len(self.l), 10)))
-
- def store_join(self, result, next_node):
- d = self.l[next_node].join()
- if next_node + 1 < len(self.l):
- d.addCallback(self.store_join, next_node + 1)
- else:
- d.addCallback(self.store_values)
-
- def test_store(self):
- from twisted.internet.base import DelayedCall
- DelayedCall.debug = True
- self.lastDefer = defer.Deferred()
- d = self.l[0].join()
- d.addCallback(self.store_join, 1)
- return self.lastDefer
-
- def tearDown(self):
- for i in self.l:
- try:
- i.leave()
- os.unlink(i.khashmir.store.db)
- except:
- pass
+++ /dev/null
-
-"""The apt-p2p implementation of the Khashmir DHT.
-
-These modules implement a modified Khashmir, which is a kademlia-like
-Distributed Hash Table available at::
-
- http://khashmir.sourceforge.net/
-
-The protocol for the implementation's communication is described here::
-
- http://www.camrdale.org/apt-p2p/protocol.html
-
-To run the DHT you probably want to do something like::
-
- from apt_p2p_Khashmir import DHT
- myDHT = DHT.DHT()
- myDHT.loadConfig(config, section)
- myDHT.join()
-
-at which point you should be up and running and connected to others in the DHT.
-
-"""
+++ /dev/null
-## Copyright 2002-2004 Andrew Loewenstern, All Rights Reserved
-# see LICENSE.txt for license information
-
-"""Details of how to perform actions on remote peers."""
-
-from twisted.internet import reactor
-from twisted.python import log
-
-from khash import intify
-from util import uncompact
-
-class ActionBase:
- """Base class for some long running asynchronous proccesses like finding nodes or values.
-
- @type caller: L{khashmir.Khashmir}
- @ivar caller: the DHT instance that is performing the action
- @type target: C{string}
- @ivar target: the target of the action, usually a DHT key
- @type config: C{dictionary}
- @ivar config: the configuration variables for the DHT
- @type action: C{string}
- @ivar action: the name of the action to call on remote nodes
- @type num: C{long}
- @ivar num: the target key in integer form
- @type queried: C{dictionary}
- @ivar queried: the nodes that have been queried for this action,
- keys are node IDs, values are the node itself
- @type answered: C{dictionary}
- @ivar answered: the nodes that have answered the queries
- @type found: C{dictionary}
- @ivar found: nodes that have been found so far by the action
- @type sorted_nodes: C{list} of L{node.Node}
- @ivar sorted_nodes: a sorted list of nodes by there proximity to the key
- @type results: C{dictionary}
- @ivar results: keys are the results found so far by the action
- @type desired_results: C{int}
- @ivar desired_results: the minimum number of results that are needed
- before the action should stop
- @type callback: C{method}
- @ivar callback: the method to call with the results
- @type outstanding: C{int}
- @ivar outstanding: the number of requests currently outstanding
- @type outstanding_results: C{int}
- @ivar outstanding_results: the number of results that are expected from
- the requests that are currently outstanding
- @type finished: C{boolean}
- @ivar finished: whether the action is done
- @type sort: C{method}
- @ivar sort: used to sort nodes by their proximity to the target
- """
-
- def __init__(self, caller, target, callback, config, action, num_results = None):
- """Initialize the action.
-
- @type caller: L{khashmir.Khashmir}
- @param caller: the DHT instance that is performing the action
- @type target: C{string}
- @param target: the target of the action, usually a DHT key
- @type callback: C{method}
- @param callback: the method to call with the results
- @type config: C{dictionary}
- @param config: the configuration variables for the DHT
- @type action: C{string}
- @param action: the name of the action to call on remote nodes
- @type num_results: C{int}
- @param num_results: the minimum number of results that are needed before
- the action should stop (optional, defaults to getting all the results)
-
- """
-
- self.caller = caller
- self.target = target
- self.config = config
- self.action = action
- self.num = intify(target)
- self.queried = {}
- self.answered = {}
- self.found = {}
- self.sorted_nodes = []
- self.results = {}
- self.desired_results = num_results
- self.callback = callback
- self.outstanding = 0
- self.outstanding_results = 0
- self.finished = False
-
- def sort(a, b, num=self.num):
- """Sort nodes relative to the ID we are looking for."""
- x, y = num ^ a.num, num ^ b.num
- if x > y:
- return 1
- elif x < y:
- return -1
- return 0
- self.sort = sort
-
- #{ Main operation
- def goWithNodes(self, nodes):
- """Start the action's process with a list of nodes to contact."""
- for node in nodes:
- if node.id == self.caller.node.id:
- continue
- else:
- self.found[node.id] = node
- self.sortNodes()
- self.schedule()
-
- def schedule(self):
- """Schedule requests to be sent to remote nodes."""
- # Check if we are already done
- if self.desired_results and ((len(self.results) >= abs(self.desired_results)) or
- (self.desired_results < 0 and
- len(self.answered) >= self.config['STORE_REDUNDANCY'])):
- self.finished = True
- result = self.generateResult()
- reactor.callLater(0, self.callback, *result)
-
- if self.finished or (self.desired_results and
- len(self.results) + self.outstanding_results >= abs(self.desired_results)):
- return
-
- # Loop for each node that should be processed
- for node in self.getNodesToProcess():
- # Don't send requests twice or to ourself
- if node.id not in self.queried and node.id != self.caller.node.id:
- self.queried[node.id] = 1
-
- # Get the action to call on the node
- try:
- f = getattr(node, self.action)
- except AttributeError:
- log.msg("%s doesn't have a %s method!" % (node, self.action))
- else:
- # Get the arguments to the action's method
- try:
- args, expected_results = self.generateArgs(node)
- except ValueError:
- pass
- else:
- # Call the action on the remote node
- self.outstanding += 1
- self.outstanding_results += expected_results
- df = f(self.caller.node.id, *args)
- df.addCallbacks(self.gotResponse, self.actionFailed,
- callbackArgs = (node, expected_results),
- errbackArgs = (node, expected_results))
-
- # We might have to stop for now
- if (self.outstanding >= self.config['CONCURRENT_REQS'] or
- (self.desired_results and
- len(self.results) + self.outstanding_results >= abs(self.desired_results))):
- break
-
- assert self.outstanding >= 0
- assert self.outstanding_results >= 0
-
- # If no requests are outstanding, then we are done
- if self.outstanding == 0:
- self.finished = True
- result = self.generateResult()
- reactor.callLater(0, self.callback, *result)
-
- def gotResponse(self, dict, node, expected_results):
- """Receive a response from a remote node."""
- self.caller.insertNode(node)
- if self.finished or self.answered.has_key(node.id):
- # a day late and a dollar short
- return
- self.outstanding -= 1
- self.outstanding_results -= expected_results
- self.answered[node.id] = 1
- self.processResponse(dict['rsp'])
- self.schedule()
-
- def actionFailed(self, err, node, expected_results):
- """Receive an error from a remote node."""
- log.msg("action %s failed (%s) %s/%s" % (self.action, self.config['PORT'], node.host, node.port))
- log.err(err)
- self.caller.table.nodeFailed(node)
- self.outstanding -= 1
- self.outstanding_results -= expected_results
- self.schedule()
-
- def handleGotNodes(self, nodes):
- """Process any received node contact info in the response.
-
- Not called by default, but suitable for being called by
- L{processResponse} in a recursive node search.
- """
- for compact_node in nodes:
- node_contact = uncompact(compact_node)
- node = self.caller.Node(node_contact)
- if not self.found.has_key(node.id):
- self.found[node.id] = node
-
- def sortNodes(self):
- """Sort the nodes, if necessary.
-
- Assumes nodes are never removed from the L{found} dictionary.
- """
- if len(self.sorted_nodes) != len(self.found):
- self.sorted_nodes = self.found.values()
- self.sorted_nodes.sort(self.sort)
-
- #{ Subclass for specific actions
- def getNodesToProcess(self):
- """Generate a list of nodes to process next.
-
- This implementation is suitable for a recurring search over all nodes.
- """
- self.sortNodes()
- return self.sorted_nodes[:self.config['K']]
-
- def generateArgs(self, node):
- """Generate the arguments to the node's action.
-
- These arguments will be appended to our node ID when calling the action.
- Also return the number of results expected from this request.
-
- @raise ValueError: if the node should not be queried
- """
- return (self.target, ), 0
-
- def processResponse(self, dict):
- """Process the response dictionary received from the remote node."""
- self.handleGotNodes(dict['nodes'])
-
- def generateResult(self, nodes):
- """Create the final result to return to the L{callback} function."""
- return []
-
-
-class FindNode(ActionBase):
- """Find the closest nodes to the key."""
-
- def __init__(self, caller, target, callback, config, action="findNode"):
- ActionBase.__init__(self, caller, target, callback, config, action)
-
- def processResponse(self, dict):
- """Save the token received from each node."""
- if dict["id"] in self.found:
- self.found[dict["id"]].updateToken(dict.get('token', ''))
- self.handleGotNodes(dict['nodes'])
-
- def generateResult(self):
- """Result is the K closest nodes to the target."""
- self.sortNodes()
- return (self.sorted_nodes[:self.config['K']], )
-
-
-class FindValue(ActionBase):
- """Find the closest nodes to the key and check for values."""
-
- def __init__(self, caller, target, callback, config, action="findValue"):
- ActionBase.__init__(self, caller, target, callback, config, action)
-
- def processResponse(self, dict):
- """Save the number of values each node has."""
- if dict["id"] in self.found:
- self.found[dict["id"]].updateNumValues(dict.get('num', 0))
- self.handleGotNodes(dict['nodes'])
-
- def generateResult(self):
- """Result is the nodes that have values, sorted by proximity to the key."""
- self.sortNodes()
- return ([node for node in self.sorted_nodes if node.num_values > 0], )
-
-
-class GetValue(ActionBase):
- """Retrieve values from a list of nodes."""
-
- def __init__(self, caller, target, local_results, num_results, callback, config, action="getValue"):
- """Initialize the action with the locally available results.
-
- @type local_results: C{list} of C{string}
- @param local_results: the values that were available in this node
- """
- ActionBase.__init__(self, caller, target, callback, config, action, num_results)
- if local_results:
- for result in local_results:
- self.results[result] = 1
-
- def getNodesToProcess(self):
- """Nodes are never added, always return the same sorted node list."""
- return self.sorted_nodes
-
- def generateArgs(self, node):
- """Arguments include the number of values to request."""
- if node.num_values > 0:
- # Request all desired results from each node, just to be sure.
- num_values = abs(self.desired_results) - len(self.results)
- assert num_values > 0
- if num_values > node.num_values:
- num_values = 0
- return (self.target, num_values), node.num_values
- else:
- raise ValueError, "Don't try and get values from this node because it doesn't have any"
-
- def processResponse(self, dict):
- """Save the returned values, calling the L{callback} each time there are new ones."""
- if dict.has_key('values'):
- def x(y, z=self.results):
- if not z.has_key(y):
- z[y] = 1
- return y
- else:
- return None
- z = len(dict['values'])
- v = filter(None, map(x, dict['values']))
- if len(v):
- reactor.callLater(0, self.callback, self.target, v)
-
- def generateResult(self):
- """Results have all been returned, now send the empty list to end the action."""
- return (self.target, [])
-
-
-class StoreValue(ActionBase):
- """Store a value in a list of nodes."""
-
- def __init__(self, caller, target, value, num_results, callback, config, action="storeValue"):
- """Initialize the action with the value to store.
-
- @type value: C{string}
- @param value: the value to store in the nodes
- """
- ActionBase.__init__(self, caller, target, callback, config, action, num_results)
- self.value = value
-
- def getNodesToProcess(self):
- """Nodes are never added, always return the same sorted list."""
- return self.sorted_nodes
-
- def generateArgs(self, node):
- """Args include the value to store and the node's token."""
- if node.token:
- return (self.target, self.value, node.token), 1
- else:
- raise ValueError, "Don't store at this node since we don't know it's token"
-
- def processResponse(self, dict):
- """Save the response, though it should be nothin but the ID."""
- self.results[dict["id"]] = dict
-
- def generateResult(self):
- """Return all the response IDs received."""
- return (self.target, self.value, self.results.values())
+++ /dev/null
-
-"""Functions for bencoding and bdecoding data.
-
-@type decode_func: C{dictionary} of C{function}
-@var decode_func: a dictionary of function calls to be made, based on data,
- the keys are the first character of the data and the value is the
- function to use to decode that data
-@type bencached_marker: C{list}
-@var bencached_marker: mutable type to ensure class origination
-@type encode_func: C{dictionary} of C{function}
-@var encode_func: a dictionary of function calls to be made, based on data,
- the keys are the type of the data and the value is the
- function to use to encode that data
-@type BencachedType: C{type}
-@var BencachedType: the L{Bencached} type
-"""
-
-from types import IntType, LongType, StringType, ListType, TupleType, DictType, BooleanType
-try:
- from types import UnicodeType
-except ImportError:
- UnicodeType = None
-from datetime import datetime
-import time
-
-from twisted.python import log
-from twisted.trial import unittest
-
-class BencodeError(ValueError):
- pass
-
-def decode_int(x, f):
- """Bdecode an integer.
-
- @type x: C{string}
- @param x: the data to decode
- @type f: C{int}
- @param f: the offset in the data to start at
- @rtype: C{int}, C{int}
- @return: the bdecoded integer, and the offset to read next
- @raise BencodeError: if the data is improperly encoded
-
- """
-
- f += 1
- newf = x.index('e', f)
- try:
- n = int(x[f:newf])
- except:
- n = long(x[f:newf])
- if x[f] == '-':
- if x[f + 1] == '0':
- raise BencodeError, "integer has a leading zero after a negative sign"
- elif x[f] == '0' and newf != f+1:
- raise BencodeError, "integer has a leading zero"
- return (n, newf+1)
-
-def decode_string(x, f):
- """Bdecode a string.
-
- @type x: C{string}
- @param x: the data to decode
- @type f: C{int}
- @param f: the offset in the data to start at
- @rtype: C{string}, C{int}
- @return: the bdecoded string, and the offset to read next
- @raise BencodeError: if the data is improperly encoded
-
- """
-
- colon = x.index(':', f)
- try:
- n = int(x[f:colon])
- except (OverflowError, ValueError):
- n = long(x[f:colon])
- if x[f] == '0' and colon != f+1:
- raise BencodeError, "string length has a leading zero"
- colon += 1
- return (x[colon:colon+n], colon+n)
-
-def decode_unicode(x, f):
- """Bdecode a unicode string.
-
- @type x: C{string}
- @param x: the data to decode
- @type f: C{int}
- @param f: the offset in the data to start at
- @rtype: C{int}, C{int}
- @return: the bdecoded unicode string, and the offset to read next
-
- """
-
- s, f = decode_string(x, f+1)
- return (s.decode('UTF-8'),f)
-
-def decode_datetime(x, f):
- """Bdecode a datetime value.
-
- @type x: C{string}
- @param x: the data to decode
- @type f: C{int}
- @param f: the offset in the data to start at
- @rtype: C{datetime.datetime}, C{int}
- @return: the bdecoded integer, and the offset to read next
- @raise BencodeError: if the data is improperly encoded
-
- """
-
- f += 1
- newf = x.index('e', f)
- try:
- date = datetime(*(time.strptime(x[f:newf], '%Y-%m-%dT%H:%M:%S')[0:6]))
- except:
- raise BencodeError, "datetime value could not be decoded: %s" % x[f:newf]
- return (date, newf+1)
-
-def decode_list(x, f):
- """Bdecode a list.
-
- @type x: C{string}
- @param x: the data to decode
- @type f: C{int}
- @param f: the offset in the data to start at
- @rtype: C{list}, C{int}
- @return: the bdecoded list, and the offset to read next
-
- """
-
- r, f = [], f+1
- while x[f] != 'e':
- v, f = decode_func[x[f]](x, f)
- r.append(v)
- return (r, f + 1)
-
-def decode_dict(x, f):
- """Bdecode a dictionary.
-
- @type x: C{string}
- @param x: the data to decode
- @type f: C{int}
- @param f: the offset in the data to start at
- @rtype: C{dictionary}, C{int}
- @return: the bdecoded dictionary, and the offset to read next
- @raise BencodeError: if the data is improperly encoded
-
- """
-
- r, f = {}, f+1
- lastkey = None
- while x[f] != 'e':
- k, f = decode_string(x, f)
- if lastkey >= k:
- raise BencodeError, "dictionary keys must be in sorted order"
- lastkey = k
- r[k], f = decode_func[x[f]](x, f)
- return (r, f + 1)
-
-decode_func = {}
-decode_func['l'] = decode_list
-decode_func['d'] = decode_dict
-decode_func['i'] = decode_int
-decode_func['0'] = decode_string
-decode_func['1'] = decode_string
-decode_func['2'] = decode_string
-decode_func['3'] = decode_string
-decode_func['4'] = decode_string
-decode_func['5'] = decode_string
-decode_func['6'] = decode_string
-decode_func['7'] = decode_string
-decode_func['8'] = decode_string
-decode_func['9'] = decode_string
-decode_func['u'] = decode_unicode
-decode_func['t'] = decode_datetime
-
-def bdecode(x, sloppy = False):
- """Bdecode a string of data.
-
- @type x: C{string}
- @param x: the data to decode
- @type sloppy: C{boolean}
- @param sloppy: whether to allow errors in the decoding
- @rtype: unknown
- @return: the bdecoded data
- @raise BencodeError: if the data is improperly encoded
-
- """
-
- try:
- r, l = decode_func[x[0]](x, 0)
-# except (IndexError, KeyError):
- except (IndexError, KeyError, ValueError):
- raise BencodeError, "bad bencoded data"
- if not sloppy and l != len(x):
- raise BencodeError, "bad bencoded data, all could not be decoded"
- return r
-
-bencached_marker = []
-
-class Bencached(object):
- """Dummy data structure for storing bencoded data in memory.
-
- @type marker: C{list}
- @ivar marker: mutable type to make sure the data was encoded by this class
- @type bencoded: C{string}
- @ivar bencoded: the bencoded data stored in a string
-
- """
-
- def __init__(self, s):
- """
-
- @type s: C{string}
- @param s: the new bencoded data to store
-
- """
-
- self.marker = bencached_marker
- self.bencoded = s
-
-BencachedType = type(Bencached('')) # insufficient, but good as a filter
-
-def encode_bencached(x,r):
- """Bencode L{Bencached} data.
-
- @type x: L{Bencached}
- @param x: the data to encode
- @type r: C{list}
- @param r: the currently bencoded data, to which the bencoding of x
- will be appended
-
- """
-
- assert x.marker == bencached_marker
- r.append(x.bencoded)
-
-def encode_int(x,r):
- """Bencode an integer.
-
- @type x: C{int}
- @param x: the data to encode
- @type r: C{list}
- @param r: the currently bencoded data, to which the bencoding of x
- will be appended
-
- """
-
- r.extend(('i',str(x),'e'))
-
-def encode_bool(x,r):
- """Bencode a boolean.
-
- @type x: C{boolean}
- @param x: the data to encode
- @type r: C{list}
- @param r: the currently bencoded data, to which the bencoding of x
- will be appended
-
- """
-
- encode_int(int(x),r)
-
-def encode_string(x,r):
- """Bencode a string.
-
- @type x: C{string}
- @param x: the data to encode
- @type r: C{list}
- @param r: the currently bencoded data, to which the bencoding of x
- will be appended
-
- """
-
- r.extend((str(len(x)),':',x))
-
-def encode_unicode(x,r):
- """Bencode a unicode string.
-
- @type x: C{unicode}
- @param x: the data to encode
- @type r: C{list}
- @param r: the currently bencoded data, to which the bencoding of x
- will be appended
-
- """
-
- #r.append('u')
- encode_string(x.encode('UTF-8'),r)
-
-def encode_datetime(x,r):
- """Bencode a datetime value in UTC.
-
- If the datetime object has time zone info, it is converted to UTC time.
- Otherwise it is assumed that the time is already in UTC time.
- Microseconds are removed.
-
- @type x: C{datetime.datetime}
- @param x: the data to encode
- @type r: C{list}
- @param r: the currently bencoded data, to which the bencoding of x
- will be appended
-
- """
-
- date = x.replace(microsecond = 0)
- offset = date.utcoffset()
- if offset is not None:
- utcdate = date.replace(tzinfo = None) + offset
- else:
- utcdate = date
- r.extend(('t',utcdate.isoformat(),'e'))
-
-def encode_list(x,r):
- """Bencode a list.
-
- @type x: C{list}
- @param x: the data to encode
- @type r: C{list}
- @param r: the currently bencoded data, to which the bencoding of x
- will be appended
-
- """
-
- r.append('l')
- for e in x:
- encode_func[type(e)](e, r)
- r.append('e')
-
-def encode_dict(x,r):
- """Bencode a dictionary.
-
- @type x: C{dictionary}
- @param x: the data to encode
- @type r: C{list}
- @param r: the currently bencoded data, to which the bencoding of x
- will be appended
-
- """
-
- r.append('d')
- ilist = x.items()
- ilist.sort()
- for k,v in ilist:
- r.extend((str(len(k)),':',k))
- encode_func[type(v)](v, r)
- r.append('e')
-
-encode_func = {}
-encode_func[BencachedType] = encode_bencached
-encode_func[IntType] = encode_int
-encode_func[LongType] = encode_int
-encode_func[StringType] = encode_string
-encode_func[ListType] = encode_list
-encode_func[TupleType] = encode_list
-encode_func[DictType] = encode_dict
-encode_func[BooleanType] = encode_bool
-encode_func[datetime] = encode_datetime
-if UnicodeType:
- encode_func[UnicodeType] = encode_unicode
-
-def bencode(x):
- """Bencode some data.
-
- @type x: unknown
- @param x: the data to encode
- @rtype: string
- @return: the bencoded data
- @raise BencodeError: if the data contains a type that cannot be encoded
-
- """
- r = []
- try:
- encode_func[type(x)](x, r)
- except:
- raise BencodeError, "failed to bencode the data"
- return ''.join(r)
-
-class TestBencode(unittest.TestCase):
- """Test the bencoding and bdecoding of data."""
-
- timeout = 2
-
- def test_bdecode_string(self):
- self.failUnlessRaises(BencodeError, bdecode, '0:0:')
- self.failUnlessRaises(BencodeError, bdecode, '')
- self.failUnlessRaises(BencodeError, bdecode, '35208734823ljdahflajhdf')
- self.failUnlessRaises(BencodeError, bdecode, '2:abfdjslhfld')
- self.failUnlessEqual(bdecode('0:'), '')
- self.failUnlessEqual(bdecode('3:abc'), 'abc')
- self.failUnlessEqual(bdecode('10:1234567890'), '1234567890')
- self.failUnlessRaises(BencodeError, bdecode, '02:xy')
- self.failUnlessRaises(BencodeError, bdecode, '9999:x')
-
- def test_bdecode_int(self):
- self.failUnlessRaises(BencodeError, bdecode, 'ie')
- self.failUnlessRaises(BencodeError, bdecode, 'i341foo382e')
- self.failUnlessEqual(bdecode('i4e'), 4L)
- self.failUnlessEqual(bdecode('i0e'), 0L)
- self.failUnlessEqual(bdecode('i123456789e'), 123456789L)
- self.failUnlessEqual(bdecode('i-10e'), -10L)
- self.failUnlessRaises(BencodeError, bdecode, 'i-0e')
- self.failUnlessRaises(BencodeError, bdecode, 'i123')
- self.failUnlessRaises(BencodeError, bdecode, 'i6easd')
- self.failUnlessRaises(BencodeError, bdecode, 'i03e')
-
- def test_bdecode_list(self):
- self.failUnlessRaises(BencodeError, bdecode, 'l')
- self.failUnlessEqual(bdecode('le'), [])
- self.failUnlessRaises(BencodeError, bdecode, 'leanfdldjfh')
- self.failUnlessEqual(bdecode('l0:0:0:e'), ['', '', ''])
- self.failUnlessRaises(BencodeError, bdecode, 'relwjhrlewjh')
- self.failUnlessEqual(bdecode('li1ei2ei3ee'), [1, 2, 3])
- self.failUnlessEqual(bdecode('l3:asd2:xye'), ['asd', 'xy'])
- self.failUnlessEqual(bdecode('ll5:Alice3:Bobeli2ei3eee'), [['Alice', 'Bob'], [2, 3]])
- self.failUnlessRaises(BencodeError, bdecode, 'l01:ae')
- self.failUnlessRaises(BencodeError, bdecode, 'l0:')
-
- def test_bdecode_dict(self):
- self.failUnlessRaises(BencodeError, bdecode, 'd')
- self.failUnlessRaises(BencodeError, bdecode, 'defoobar')
- self.failUnlessEqual(bdecode('de'), {})
- self.failUnlessEqual(bdecode('d3:agei25e4:eyes4:bluee'), {'age': 25, 'eyes': 'blue'})
- self.failUnlessEqual(bdecode('d8:spam.mp3d6:author5:Alice6:lengthi100000eee'),
- {'spam.mp3': {'author': 'Alice', 'length': 100000}})
- self.failUnlessRaises(BencodeError, bdecode, 'd3:fooe')
- self.failUnlessRaises(BencodeError, bdecode, 'di1e0:e')
- self.failUnlessRaises(BencodeError, bdecode, 'd1:b0:1:a0:e')
- self.failUnlessRaises(BencodeError, bdecode, 'd1:a0:1:a0:e')
- self.failUnlessRaises(BencodeError, bdecode, 'd0:0:')
- self.failUnlessRaises(BencodeError, bdecode, 'd0:')
-
- def test_bdecode_unicode(self):
- self.failUnlessRaises(BencodeError, bdecode, 'u0:0:')
- self.failUnlessRaises(BencodeError, bdecode, 'u')
- self.failUnlessRaises(BencodeError, bdecode, 'u35208734823ljdahflajhdf')
- self.failUnlessRaises(BencodeError, bdecode, 'u2:abfdjslhfld')
- self.failUnlessEqual(bdecode('u0:'), '')
- self.failUnlessEqual(bdecode('u3:abc'), 'abc')
- self.failUnlessEqual(bdecode('u10:1234567890'), '1234567890')
- self.failUnlessRaises(BencodeError, bdecode, 'u02:xy')
- self.failUnlessRaises(BencodeError, bdecode, 'u9999:x')
-
- def test_bencode_int(self):
- self.failUnlessEqual(bencode(4), 'i4e')
- self.failUnlessEqual(bencode(0), 'i0e')
- self.failUnlessEqual(bencode(-10), 'i-10e')
- self.failUnlessEqual(bencode(12345678901234567890L), 'i12345678901234567890e')
-
- def test_bencode_string(self):
- self.failUnlessEqual(bencode(''), '0:')
- self.failUnlessEqual(bencode('abc'), '3:abc')
- self.failUnlessEqual(bencode('1234567890'), '10:1234567890')
-
- def test_bencode_list(self):
- self.failUnlessEqual(bencode([]), 'le')
- self.failUnlessEqual(bencode([1, 2, 3]), 'li1ei2ei3ee')
- self.failUnlessEqual(bencode([['Alice', 'Bob'], [2, 3]]), 'll5:Alice3:Bobeli2ei3eee')
-
- def test_bencode_dict(self):
- self.failUnlessEqual(bencode({}), 'de')
- self.failUnlessEqual(bencode({'age': 25, 'eyes': 'blue'}), 'd3:agei25e4:eyes4:bluee')
- self.failUnlessEqual(bencode({'spam.mp3': {'author': 'Alice', 'length': 100000}}),
- 'd8:spam.mp3d6:author5:Alice6:lengthi100000eee')
- self.failUnlessRaises(BencodeError, bencode, {1: 'foo'})
-
- def test_bencode_unicode(self):
- self.failUnlessEqual(bencode(u''), '0:')
- self.failUnlessEqual(bencode(u'abc'), '3:abc')
- self.failUnlessEqual(bencode(u'1234567890'), '10:1234567890')
-
- def test_bool(self):
- self.failUnless(bdecode(bencode(True)))
- self.failIf(bdecode(bencode(False)))
-
- def test_datetime(self):
- date = datetime.utcnow()
- self.failUnlessEqual(bdecode(bencode(date)), date.replace(microsecond = 0))
-
- if UnicodeType == None:
- test_bencode_unicode.skip = "Python was not compiled with unicode support"
- test_bdecode_unicode.skip = "Python was not compiled with unicode support"
+++ /dev/null
-
-"""An sqlite database for storing nodes and key/value pairs."""
-
-from datetime import datetime, timedelta
-from pysqlite2 import dbapi2 as sqlite
-from binascii import a2b_base64, b2a_base64
-from time import sleep
-import os
-
-from twisted.trial import unittest
-
-class DBExcept(Exception):
- pass
-
-class khash(str):
- """Dummy class to convert all hashes to base64 for storing in the DB."""
-
-class dht_value(str):
- """Dummy class to convert all DHT values to base64 for storing in the DB."""
-
-# Initialize the database to work with 'khash' objects (binary strings)
-sqlite.register_adapter(khash, b2a_base64)
-sqlite.register_converter("KHASH", a2b_base64)
-sqlite.register_converter("khash", a2b_base64)
-
-# Initialize the database to work with DHT values (binary strings)
-sqlite.register_adapter(dht_value, b2a_base64)
-sqlite.register_converter("DHT_VALUE", a2b_base64)
-sqlite.register_converter("dht_value", a2b_base64)
-
-class DB:
- """An sqlite database for storing persistent node info and key/value pairs.
-
- @type db: C{string}
- @ivar db: the database file to use
- @type conn: L{pysqlite2.dbapi2.Connection}
- @ivar conn: an open connection to the sqlite database
- """
-
- def __init__(self, db):
- """Load or create the database file.
-
- @type db: C{string}
- @param db: the database file to use
- """
- self.db = db
- try:
- os.stat(db)
- except OSError:
- self._createNewDB(db)
- else:
- self._loadDB(db)
- if sqlite.version_info < (2, 1):
- sqlite.register_converter("TEXT", str)
- sqlite.register_converter("text", str)
- else:
- self.conn.text_factory = str
-
- #{ Loading the DB
- def _loadDB(self, db):
- """Open a new connection to the existing database file"""
- try:
- self.conn = sqlite.connect(database=db, detect_types=sqlite.PARSE_DECLTYPES)
- except:
- import traceback
- raise DBExcept, "Couldn't open DB", traceback.format_exc()
-
- def _createNewDB(self, db):
- """Open a connection to a new database and create the necessary tables."""
- self.conn = sqlite.connect(database=db, detect_types=sqlite.PARSE_DECLTYPES)
- c = self.conn.cursor()
- c.execute("CREATE TABLE kv (key KHASH, value DHT_VALUE, last_refresh TIMESTAMP, "+
- "PRIMARY KEY (key, value))")
- c.execute("CREATE INDEX kv_key ON kv(key)")
- c.execute("CREATE INDEX kv_last_refresh ON kv(last_refresh)")
- c.execute("CREATE TABLE nodes (id KHASH PRIMARY KEY, host TEXT, port NUMBER)")
- c.execute("CREATE TABLE self (num NUMBER PRIMARY KEY, id KHASH)")
- self.conn.commit()
-
- def close(self):
- self.conn.close()
-
- #{ This node's ID
- def getSelfNode(self):
- """Retrieve this node's ID from a previous run of the program."""
- c = self.conn.cursor()
- c.execute('SELECT id FROM self WHERE num = 0')
- id = c.fetchone()
- if id:
- return id[0]
- else:
- return None
-
- def saveSelfNode(self, id):
- """Store this node's ID for a subsequent run of the program."""
- c = self.conn.cursor()
- c.execute("INSERT OR REPLACE INTO self VALUES (0, ?)", (khash(id),))
- self.conn.commit()
-
- #{ Routing table
- def dumpRoutingTable(self, buckets):
- """Save routing table nodes to the database."""
- c = self.conn.cursor()
- c.execute("DELETE FROM nodes WHERE id NOT NULL")
- for bucket in buckets:
- for node in bucket.l:
- c.execute("INSERT INTO nodes VALUES (?, ?, ?)", (khash(node.id), node.host, node.port))
- self.conn.commit()
-
- def getRoutingTable(self):
- """Load routing table nodes from database."""
- c = self.conn.cursor()
- c.execute("SELECT * FROM nodes")
- return c.fetchall()
-
- #{ Key/value pairs
- def retrieveValues(self, key):
- """Retrieve values from the database."""
- c = self.conn.cursor()
- c.execute("SELECT value FROM kv WHERE key = ?", (khash(key),))
- l = []
- rows = c.fetchall()
- for row in rows:
- l.append(row[0])
- return l
-
- def countValues(self, key):
- """Count the number of values in the database."""
- c = self.conn.cursor()
- c.execute("SELECT COUNT(value) as num_values FROM kv WHERE key = ?", (khash(key),))
- res = 0
- row = c.fetchone()
- if row:
- res = row[0]
- return res
-
- def storeValue(self, key, value):
- """Store or update a key and value."""
- c = self.conn.cursor()
- c.execute("INSERT OR REPLACE INTO kv VALUES (?, ?, ?)",
- (khash(key), dht_value(value), datetime.now()))
- self.conn.commit()
-
- def expireValues(self, expireAfter):
- """Expire older values after expireAfter seconds."""
- t = datetime.now() - timedelta(seconds=expireAfter)
- c = self.conn.cursor()
- c.execute("DELETE FROM kv WHERE last_refresh < ?", (t, ))
- self.conn.commit()
-
-class TestDB(unittest.TestCase):
- """Tests for the khashmir database."""
-
- timeout = 5
- db = '/tmp/khashmir.db'
- key = '\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!'
-
- def setUp(self):
- self.store = DB(self.db)
-
- def test_selfNode(self):
- self.store.saveSelfNode(self.key)
- self.failUnlessEqual(self.store.getSelfNode(), self.key)
-
- def test_Value(self):
- self.store.storeValue(self.key, self.key)
- val = self.store.retrieveValues(self.key)
- self.failUnlessEqual(len(val), 1)
- self.failUnlessEqual(val[0], self.key)
-
- def test_expireValues(self):
- self.store.storeValue(self.key, self.key)
- sleep(2)
- self.store.storeValue(self.key, self.key+self.key)
- self.store.expireValues(1)
- val = self.store.retrieveValues(self.key)
- self.failUnlessEqual(len(val), 1)
- self.failUnlessEqual(val[0], self.key+self.key)
-
- def test_RoutingTable(self):
- class dummy:
- id = self.key
- host = "127.0.0.1"
- port = 9977
- def contents(self):
- return (self.id, self.host, self.port)
- dummy2 = dummy()
- dummy2.id = '\xaa\xbb\xcc\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!'
- dummy2.host = '205.23.67.124'
- dummy2.port = 12345
- class bl:
- def __init__(self):
- self.l = []
- bl1 = bl()
- bl1.l.append(dummy())
- bl2 = bl()
- bl2.l.append(dummy2)
- buckets = [bl1, bl2]
- self.store.dumpRoutingTable(buckets)
- rt = self.store.getRoutingTable()
- self.failUnlessIn(dummy().contents(), rt)
- self.failUnlessIn(dummy2.contents(), rt)
-
- def tearDown(self):
- self.store.close()
- os.unlink(self.db)
+++ /dev/null
-## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved
-# see LICENSE.txt for license information
-
-"""Functions to deal with hashes (node IDs and keys)."""
-
-from sha import sha
-from os import urandom
-
-from twisted.trial import unittest
-
-def intify(hstr):
- """Convert a hash (big-endian) to a long python integer."""
- assert len(hstr) == 20
- return long(hstr.encode('hex'), 16)
-
-def stringify(num):
- """Convert a long python integer to a hash."""
- str = hex(num)[2:]
- if str[-1] == 'L':
- str = str[:-1]
- if len(str) % 2 != 0:
- str = '0' + str
- str = str.decode('hex')
- return (20 - len(str)) *'\x00' + str
-
-def distance(a, b):
- """Calculate the distance between two hashes expressed as strings."""
- return intify(a) ^ intify(b)
-
-def newID():
- """Get a new pseudorandom globally unique hash string."""
- h = sha()
- h.update(urandom(20))
- return h.digest()
-
-def newIDInRange(min, max):
- """Get a new pseudorandom globally unique hash string in the range."""
- return stringify(randRange(min,max))
-
-def randRange(min, max):
- """Get a new pseudorandom globally unique hash number in the range."""
- return min + intify(newID()) % (max - min)
-
-def newTID():
- """Get a new pseudorandom transaction ID number."""
- return randRange(-2**30, 2**30)
-
-class TestNewID(unittest.TestCase):
- """Test the newID function."""
- def testLength(self):
- self.failUnlessEqual(len(newID()), 20)
- def testHundreds(self):
- for x in xrange(100):
- self.testLength
-
-class TestIntify(unittest.TestCase):
- """Test the intify function."""
- known = [('\0' * 20, 0),
- ('\xff' * 20, 2L**160 - 1),
- ]
- def testKnown(self):
- for str, value in self.known:
- self.failUnlessEqual(intify(str), value)
- def testEndianessOnce(self):
- h = newID()
- while h[-1] == '\xff':
- h = newID()
- k = h[:-1] + chr(ord(h[-1]) + 1)
- self.failUnlessEqual(intify(k) - intify(h), 1)
- def testEndianessLots(self):
- for x in xrange(100):
- self.testEndianessOnce()
-
-class TestDisantance(unittest.TestCase):
- """Test the distance function."""
- known = [
- (("\0" * 20, "\xff" * 20), 2**160L -1),
- ((sha("foo").digest(), sha("foo").digest()), 0),
- ((sha("bar").digest(), sha("bar").digest()), 0)
- ]
- def testKnown(self):
- for pair, dist in self.known:
- self.failUnlessEqual(distance(pair[0], pair[1]), dist)
- def testCommutitive(self):
- for i in xrange(100):
- x, y, z = newID(), newID(), newID()
- self.failUnlessEqual(distance(x,y) ^ distance(y, z), distance(x, z))
-
-class TestRandRange(unittest.TestCase):
- """Test the randRange function."""
- def testOnce(self):
- a = intify(newID())
- b = intify(newID())
- if a < b:
- c = randRange(a, b)
- self.failUnlessEqual(a <= c < b, True, "output out of range %d %d %d" % (b, c, a))
- else:
- c = randRange(b, a)
- self.failUnlessEqual(b <= c < a, True, "output out of range %d %d %d" % (b, c, a))
-
- def testOneHundredTimes(self):
- for i in xrange(100):
- self.testOnce()
+++ /dev/null
-## Copyright 2002-2004 Andrew Loewenstern, All Rights Reserved
-# see LICENSE.txt for license information
-
-"""The main Khashmir program."""
-
-import warnings
-warnings.simplefilter("ignore", DeprecationWarning)
-
-from datetime import datetime, timedelta
-from random import randrange, shuffle
-from sha import sha
-import os
-
-from twisted.internet.defer import Deferred
-from twisted.internet import protocol, reactor
-from twisted.trial import unittest
-
-from db import DB
-from ktable import KTable
-from knode import KNodeBase, KNodeRead, KNodeWrite, NULL_ID
-from khash import newID, newIDInRange
-from actions import FindNode, FindValue, GetValue, StoreValue
-import krpc
-
-class KhashmirBase(protocol.Factory):
- """The base Khashmir class, with base functionality and find node, no key-value mappings.
-
- @type _Node: L{node.Node}
- @ivar _Node: the knode implementation to use for this class of DHT
- @type config: C{dictionary}
- @ivar config: the configuration parameters for the DHT
- @type port: C{int}
- @ivar port: the port to listen on
- @type store: L{db.DB}
- @ivar store: the database to store nodes and key/value pairs in
- @type node: L{node.Node}
- @ivar node: this node
- @type table: L{ktable.KTable}
- @ivar table: the routing table
- @type token_secrets: C{list} of C{string}
- @ivar token_secrets: the current secrets to use to create tokens
- @type udp: L{krpc.hostbroker}
- @ivar udp: the factory for the KRPC protocol
- @type listenport: L{twisted.internet.interfaces.IListeningPort}
- @ivar listenport: the UDP listening port
- @type next_checkpoint: L{twisted.internet.interfaces.IDelayedCall}
- @ivar next_checkpoint: the delayed call for the next checkpoint
- """
-
- _Node = KNodeBase
-
- def __init__(self, config, cache_dir='/tmp'):
- """Initialize the Khashmir class and call the L{setup} method.
-
- @type config: C{dictionary}
- @param config: the configuration parameters for the DHT
- @type cache_dir: C{string}
- @param cache_dir: the directory to store all files in
- (optional, defaults to the /tmp directory)
- """
- self.config = None
- self.setup(config, cache_dir)
-
- def setup(self, config, cache_dir):
- """Setup all the Khashmir sub-modules.
-
- @type config: C{dictionary}
- @param config: the configuration parameters for the DHT
- @type cache_dir: C{string}
- @param cache_dir: the directory to store all files in
- """
- self.config = config
- self.port = config['PORT']
- self.store = DB(os.path.join(cache_dir, 'khashmir.' + str(self.port) + '.db'))
- self.node = self._loadSelfNode('', self.port)
- self.table = KTable(self.node, config)
- self.token_secrets = [newID()]
-
- # Start listening
- self.udp = krpc.hostbroker(self, config)
- self.udp.protocol = krpc.KRPC
- self.listenport = reactor.listenUDP(self.port, self.udp)
-
- # Load the routing table and begin checkpointing
- self._loadRoutingTable()
- self.refreshTable(force = True)
- self.next_checkpoint = reactor.callLater(60, self.checkpoint)
-
- def Node(self, id, host = None, port = None):
- """Create a new node.
-
- @see: L{node.Node.__init__}
- """
- n = self._Node(id, host, port)
- n.table = self.table
- n.conn = self.udp.connectionForAddr((n.host, n.port))
- return n
-
- def __del__(self):
- """Stop listening for packets."""
- self.listenport.stopListening()
-
- def _loadSelfNode(self, host, port):
- """Create this node, loading any previously saved one."""
- id = self.store.getSelfNode()
- if not id:
- id = newID()
- return self._Node(id, host, port)
-
- def checkpoint(self):
- """Perform some periodic maintenance operations."""
- # Create a new token secret
- self.token_secrets.insert(0, newID())
- if len(self.token_secrets) > 3:
- self.token_secrets.pop()
-
- # Save some parameters for reloading
- self.store.saveSelfNode(self.node.id)
- self.store.dumpRoutingTable(self.table.buckets)
-
- # DHT maintenance
- self.store.expireValues(self.config['KEY_EXPIRE'])
- self.refreshTable()
-
- self.next_checkpoint = reactor.callLater(randrange(int(self.config['CHECKPOINT_INTERVAL'] * .9),
- int(self.config['CHECKPOINT_INTERVAL'] * 1.1)),
- self.checkpoint)
-
- def _loadRoutingTable(self):
- """Load the previous routing table nodes from the database.
-
- It's usually a good idea to call refreshTable(force = True) after
- loading the table.
- """
- nodes = self.store.getRoutingTable()
- for rec in nodes:
- n = self.Node(rec[0], rec[1], int(rec[2]))
- self.table.insertNode(n, contacted = False)
-
- #{ Local interface
- def addContact(self, host, port, callback=None, errback=None):
- """Ping this node and add the contact info to the table on pong.
-
- @type host: C{string}
- @param host: the IP address of the node to contact
- @type port: C{int}
- @param port:the port of the node to contact
- @type callback: C{method}
- @param callback: the method to call with the results, it must take 1
- parameter, the contact info returned by the node
- (optional, defaults to doing nothing with the results)
- @type errback: C{method}
- @param errback: the method to call if an error occurs
- (optional, defaults to calling the callback with None)
- """
- n = self.Node(NULL_ID, host, port)
- self.sendJoin(n, callback=callback, errback=errback)
-
- def findNode(self, id, callback, errback=None):
- """Find the contact info for the K closest nodes in the global table.
-
- @type id: C{string}
- @param id: the target ID to find the K closest nodes of
- @type callback: C{method}
- @param callback: the method to call with the results, it must take 1
- parameter, the list of K closest nodes
- @type errback: C{method}
- @param errback: the method to call if an error occurs
- (optional, defaults to doing nothing when an error occurs)
- """
- # Get K nodes out of local table/cache
- nodes = self.table.findNodes(id)
- d = Deferred()
- if errback:
- d.addCallbacks(callback, errback)
- else:
- d.addCallback(callback)
-
- # If the target ID was found
- if len(nodes) == 1 and nodes[0].id == id:
- d.callback(nodes)
- else:
- # Start the finding nodes action
- state = FindNode(self, id, d.callback, self.config)
- reactor.callLater(0, state.goWithNodes, nodes)
-
- def insertNode(self, node, contacted = True):
- """Try to insert a node in our local table, pinging oldest contact if necessary.
-
- If all you have is a host/port, then use L{addContact}, which calls this
- method after receiving the PONG from the remote node. The reason for
- the seperation is we can't insert a node into the table without its
- node ID. That means of course the node passed into this method needs
- to be a properly formed Node object with a valid ID.
-
- @type node: L{node.Node}
- @param node: the new node to try and insert
- @type contacted: C{boolean}
- @param contacted: whether the new node is known to be good, i.e.
- responded to a request (optional, defaults to True)
- """
- old = self.table.insertNode(node, contacted=contacted)
- if (old and old.id != self.node.id and
- (datetime.now() - old.lastSeen) >
- timedelta(seconds=self.config['MIN_PING_INTERVAL'])):
-
- def _staleNodeHandler(oldnode = old, newnode = node):
- """The pinged node never responded, so replace it."""
- self.table.replaceStaleNode(oldnode, newnode)
-
- def _notStaleNodeHandler(dict, old=old):
- """Got a pong from the old node, so update it."""
- dict = dict['rsp']
- if dict['id'] == old.id:
- self.table.justSeenNode(old.id)
-
- # Bucket is full, check to see if old node is still available
- df = old.ping(self.node.id)
- df.addCallbacks(_notStaleNodeHandler, _staleNodeHandler)
-
- def sendJoin(self, node, callback=None, errback=None):
- """Join the DHT by pinging a bootstrap node.
-
- @type node: L{node.Node}
- @param node: the node to send the join to
- @type callback: C{method}
- @param callback: the method to call with the results, it must take 1
- parameter, the contact info returned by the node
- (optional, defaults to doing nothing with the results)
- @type errback: C{method}
- @param errback: the method to call if an error occurs
- (optional, defaults to calling the callback with None)
- """
-
- def _pongHandler(dict, node=node, self=self, callback=callback):
- """Node responded properly, callback with response."""
- n = self.Node(dict['rsp']['id'], dict['_krpc_sender'][0], dict['_krpc_sender'][1])
- self.insertNode(n)
- if callback:
- callback((dict['rsp']['ip_addr'], dict['rsp']['port']))
-
- def _defaultPong(err, node=node, table=self.table, callback=callback, errback=errback):
- """Error occurred, fail node and errback or callback with error."""
- table.nodeFailed(node)
- if errback:
- errback()
- elif callback:
- callback(None)
-
- df = node.join(self.node.id)
- df.addCallbacks(_pongHandler, _defaultPong)
-
- def findCloseNodes(self, callback=lambda a: None, errback = None):
- """Perform a findNode on the ID one away from our own.
-
- This will allow us to populate our table with nodes on our network
- closest to our own. This is called as soon as we start up with an
- empty table.
-
- @type callback: C{method}
- @param callback: the method to call with the results, it must take 1
- parameter, the list of K closest nodes
- (optional, defaults to doing nothing with the results)
- @type errback: C{method}
- @param errback: the method to call if an error occurs
- (optional, defaults to doing nothing when an error occurs)
- """
- id = self.node.id[:-1] + chr((ord(self.node.id[-1]) + 1) % 256)
- self.findNode(id, callback, errback)
-
- def refreshTable(self, force = False):
- """Check all the buckets for those that need refreshing.
-
- @param force: refresh all buckets regardless of last bucket access time
- (optional, defaults to False)
- """
- def callback(nodes):
- pass
-
- for bucket in self.table.buckets:
- if force or (datetime.now() - bucket.lastAccessed >
- timedelta(seconds=self.config['BUCKET_STALENESS'])):
- # Choose a random ID in the bucket and try and find it
- id = newIDInRange(bucket.min, bucket.max)
- self.findNode(id, callback)
-
- def stats(self):
- """Collect some statistics about the DHT.
-
- @rtype: (C{int}, C{int})
- @return: the number contacts in our routing table, and the estimated
- number of nodes in the entire DHT
- """
- num_contacts = reduce(lambda a, b: a + len(b.l), self.table.buckets, 0)
- num_nodes = self.config['K'] * (2**(len(self.table.buckets) - 1))
- return (num_contacts, num_nodes)
-
- def shutdown(self):
- """Closes the port and cancels pending later calls."""
- self.listenport.stopListening()
- try:
- self.next_checkpoint.cancel()
- except:
- pass
- self.store.close()
-
- #{ Remote interface
- def krpc_ping(self, id, _krpc_sender):
- """Pong with our ID.
-
- @type id: C{string}
- @param id: the node ID of the sender node
- @type _krpc_sender: (C{string}, C{int})
- @param _krpc_sender: the sender node's IP address and port
- """
- n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
- self.insertNode(n, contacted = False)
-
- return {"id" : self.node.id}
-
- def krpc_join(self, id, _krpc_sender):
- """Add the node by responding with its address and port.
-
- @type id: C{string}
- @param id: the node ID of the sender node
- @type _krpc_sender: (C{string}, C{int})
- @param _krpc_sender: the sender node's IP address and port
- """
- n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
- self.insertNode(n, contacted = False)
-
- return {"ip_addr" : _krpc_sender[0], "port" : _krpc_sender[1], "id" : self.node.id}
-
- def krpc_find_node(self, target, id, _krpc_sender):
- """Find the K closest nodes to the target in the local routing table.
-
- @type target: C{string}
- @param target: the target ID to find nodes for
- @type id: C{string}
- @param id: the node ID of the sender node
- @type _krpc_sender: (C{string}, C{int})
- @param _krpc_sender: the sender node's IP address and port
- """
- n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
- self.insertNode(n, contacted = False)
-
- nodes = self.table.findNodes(target)
- nodes = map(lambda node: node.contactInfo(), nodes)
- token = sha(self.token_secrets[0] + _krpc_sender[0]).digest()
- return {"nodes" : nodes, "token" : token, "id" : self.node.id}
-
-
-class KhashmirRead(KhashmirBase):
- """The read-only Khashmir class, which can only retrieve (not store) key/value mappings."""
-
- _Node = KNodeRead
-
- #{ Local interface
- def findValue(self, key, callback, errback=None):
- """Get the nodes that have values for the key from the global table.
-
- @type key: C{string}
- @param key: the target key to find the values for
- @type callback: C{method}
- @param callback: the method to call with the results, it must take 1
- parameter, the list of nodes with values
- @type errback: C{method}
- @param errback: the method to call if an error occurs
- (optional, defaults to doing nothing when an error occurs)
- """
- # Get K nodes out of local table/cache
- nodes = self.table.findNodes(key)
- d = Deferred()
- if errback:
- d.addCallbacks(callback, errback)
- else:
- d.addCallback(callback)
-
- # Search for others starting with the locally found ones
- state = FindValue(self, key, d.callback, self.config)
- reactor.callLater(0, state.goWithNodes, nodes)
-
- def valueForKey(self, key, callback, searchlocal = True):
- """Get the values found for key in global table.
-
- Callback will be called with a list of values for each peer that
- returns unique values. The final callback will be an empty list.
-
- @type key: C{string}
- @param key: the target key to get the values for
- @type callback: C{method}
- @param callback: the method to call with the results, it must take 2
- parameters: the key, and the values found
- @type searchlocal: C{boolean}
- @param searchlocal: whether to also look for any local values
- """
- # Get any local values
- if searchlocal:
- l = self.store.retrieveValues(key)
- if len(l) > 0:
- reactor.callLater(0, callback, key, l)
- else:
- l = []
-
- def _getValueForKey(nodes, key=key, local_values=l, response=callback, self=self):
- """Use the found nodes to send requests for values to."""
- state = GetValue(self, key, local_values, self.config['RETRIEVE_VALUES'], response, self.config)
- reactor.callLater(0, state.goWithNodes, nodes)
-
- # First lookup nodes that have values for the key
- self.findValue(key, _getValueForKey)
-
- #{ Remote interface
- def krpc_find_value(self, key, id, _krpc_sender):
- """Find the number of values stored locally for the key, and the K closest nodes.
-
- @type key: C{string}
- @param key: the target key to find the values and nodes for
- @type id: C{string}
- @param id: the node ID of the sender node
- @type _krpc_sender: (C{string}, C{int})
- @param _krpc_sender: the sender node's IP address and port
- """
- n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
- self.insertNode(n, contacted = False)
-
- nodes = self.table.findNodes(key)
- nodes = map(lambda node: node.contactInfo(), nodes)
- num_values = self.store.countValues(key)
- return {'nodes' : nodes, 'num' : num_values, "id": self.node.id}
-
- def krpc_get_value(self, key, num, id, _krpc_sender):
- """Retrieve the values stored locally for the key.
-
- @type key: C{string}
- @param key: the target key to retrieve the values for
- @type num: C{int}
- @param num: the maximum number of values to retrieve, or 0 to
- retrieve all of them
- @type id: C{string}
- @param id: the node ID of the sender node
- @type _krpc_sender: (C{string}, C{int})
- @param _krpc_sender: the sender node's IP address and port
- """
- n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
- self.insertNode(n, contacted = False)
-
- l = self.store.retrieveValues(key)
- if num == 0 or num >= len(l):
- return {'values' : l, "id": self.node.id}
- else:
- shuffle(l)
- return {'values' : l[:num], "id": self.node.id}
-
-
-class KhashmirWrite(KhashmirRead):
- """The read-write Khashmir class, which can store and retrieve key/value mappings."""
-
- _Node = KNodeWrite
-
- #{ Local interface
- def storeValueForKey(self, key, value, callback=None):
- """Stores the value for the key in the global table.
-
- No status in this implementation, peers respond but don't indicate
- status of storing values.
-
- @type key: C{string}
- @param key: the target key to store the value for
- @type value: C{string}
- @param value: the value to store with the key
- @type callback: C{method}
- @param callback: the method to call with the results, it must take 3
- parameters: the key, the value stored, and the result of the store
- (optional, defaults to doing nothing with the results)
- """
- def _storeValueForKey(nodes, key=key, value=value, response=callback, self=self):
- """Use the returned K closest nodes to store the key at."""
- if not response:
- def _storedValueHandler(key, value, sender):
- """Default callback that does nothing."""
- pass
- response = _storedValueHandler
- action = StoreValue(self, key, value, self.config['STORE_REDUNDANCY'], response, self.config)
- reactor.callLater(0, action.goWithNodes, nodes)
-
- # First find the K closest nodes to operate on.
- self.findNode(key, _storeValueForKey)
-
- #{ Remote interface
- def krpc_store_value(self, key, value, token, id, _krpc_sender):
- """Store the value locally with the key.
-
- @type key: C{string}
- @param key: the target key to store the value for
- @type value: C{string}
- @param value: the value to store with the key
- @param token: the token to confirm that this peer contacted us previously
- @type id: C{string}
- @param id: the node ID of the sender node
- @type _krpc_sender: (C{string}, C{int})
- @param _krpc_sender: the sender node's IP address and port
- """
- n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
- self.insertNode(n, contacted = False)
- for secret in self.token_secrets:
- this_token = sha(secret + _krpc_sender[0]).digest()
- if token == this_token:
- self.store.storeValue(key, value)
- return {"id" : self.node.id}
- raise krpc.KrpcError, (krpc.KRPC_ERROR_INVALID_TOKEN, 'token is invalid, do a find_nodes to get a fresh one')
-
-
-class Khashmir(KhashmirWrite):
- """The default Khashmir class (currently the read-write L{KhashmirWrite})."""
- _Node = KNodeWrite
-
-
-class SimpleTests(unittest.TestCase):
-
- timeout = 10
- DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160,
- 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4,
- 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000,
- 'MAX_FAILURES': 3,
- 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600,
- 'KEY_EXPIRE': 3600, 'SPEW': False, }
-
- def setUp(self):
- d = self.DHT_DEFAULTS.copy()
- d['PORT'] = 4044
- self.a = Khashmir(d)
- d = self.DHT_DEFAULTS.copy()
- d['PORT'] = 4045
- self.b = Khashmir(d)
-
- def tearDown(self):
- self.a.shutdown()
- self.b.shutdown()
- os.unlink(self.a.store.db)
- os.unlink(self.b.store.db)
-
- def testAddContact(self):
- self.failUnlessEqual(len(self.a.table.buckets), 1)
- self.failUnlessEqual(len(self.a.table.buckets[0].l), 0)
-
- self.failUnlessEqual(len(self.b.table.buckets), 1)
- self.failUnlessEqual(len(self.b.table.buckets[0].l), 0)
-
- self.a.addContact('127.0.0.1', 4045)
- reactor.iterate()
- reactor.iterate()
- reactor.iterate()
- reactor.iterate()
-
- self.failUnlessEqual(len(self.a.table.buckets), 1)
- self.failUnlessEqual(len(self.a.table.buckets[0].l), 1)
- self.failUnlessEqual(len(self.b.table.buckets), 1)
- self.failUnlessEqual(len(self.b.table.buckets[0].l), 1)
-
- def testStoreRetrieve(self):
- self.a.addContact('127.0.0.1', 4045)
- reactor.iterate()
- reactor.iterate()
- reactor.iterate()
- reactor.iterate()
- self.got = 0
- self.a.storeValueForKey(sha('foo').digest(), 'foobar')
- reactor.iterate()
- reactor.iterate()
- reactor.iterate()
- reactor.iterate()
- reactor.iterate()
- reactor.iterate()
- self.a.valueForKey(sha('foo').digest(), self._cb)
- reactor.iterate()
- reactor.iterate()
- reactor.iterate()
- reactor.iterate()
- reactor.iterate()
- reactor.iterate()
- reactor.iterate()
-
- def _cb(self, key, val):
- if not val:
- self.failUnlessEqual(self.got, 1)
- elif 'foobar' in val:
- self.got = 1
-
-
-class MultiTest(unittest.TestCase):
-
- timeout = 30
- num = 20
- DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160,
- 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4,
- 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000,
- 'MAX_FAILURES': 3,
- 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600,
- 'KEY_EXPIRE': 3600, 'SPEW': False, }
-
- def _done(self, val):
- self.done = 1
-
- def setUp(self):
- self.l = []
- self.startport = 4088
- for i in range(self.num):
- d = self.DHT_DEFAULTS.copy()
- d['PORT'] = self.startport + i
- self.l.append(Khashmir(d))
- reactor.iterate()
- reactor.iterate()
-
- for i in self.l:
- i.addContact('127.0.0.1', self.l[randrange(0,self.num)].port)
- i.addContact('127.0.0.1', self.l[randrange(0,self.num)].port)
- i.addContact('127.0.0.1', self.l[randrange(0,self.num)].port)
- reactor.iterate()
- reactor.iterate()
- reactor.iterate()
-
- for i in self.l:
- self.done = 0
- i.findCloseNodes(self._done)
- while not self.done:
- reactor.iterate()
- for i in self.l:
- self.done = 0
- i.findCloseNodes(self._done)
- while not self.done:
- reactor.iterate()
-
- def tearDown(self):
- for i in self.l:
- i.shutdown()
- os.unlink(i.store.db)
-
- reactor.iterate()
-
- def testStoreRetrieve(self):
- for i in range(10):
- K = newID()
- V = newID()
-
- for a in range(3):
- self.done = 0
- def _scb(key, value, result):
- self.done = 1
- self.l[randrange(0, self.num)].storeValueForKey(K, V, _scb)
- while not self.done:
- reactor.iterate()
-
-
- def _rcb(key, val):
- if not val:
- self.done = 1
- self.failUnlessEqual(self.got, 1)
- elif V in val:
- self.got = 1
- for x in range(3):
- self.got = 0
- self.done = 0
- self.l[randrange(0, self.num)].valueForKey(K, _rcb)
- while not self.done:
- reactor.iterate()
+++ /dev/null
-## Copyright 2002-2004 Andrew Loewenstern, All Rights Reserved
-# see LICENSE.txt for license information
-
-"""Represents a khashmir node in the DHT."""
-
-from twisted.python import log
-
-from node import Node, NULL_ID
-
-class KNodeBase(Node):
- """A basic node that can only be pinged and help find other nodes."""
-
- def checkSender(self, dict):
- """Check the sender's info to make sure it meets expectations."""
- try:
- senderid = dict['rsp']['id']
- except KeyError:
- log.msg("No peer id in response")
- raise Exception, "No peer id in response."
- else:
- if self.id != NULL_ID and senderid != self.id:
- log.msg("Got response from different node than expected.")
- self.table.invalidateNode(self)
-
- return dict
-
- def errBack(self, err):
- """Log an error that has occurred."""
- log.err(err)
- return err
-
- def ping(self, id):
- """Ping the node."""
- df = self.conn.sendRequest('ping', {"id":id})
- df.addErrback(self.errBack)
- df.addCallback(self.checkSender)
- return df
-
- def join(self, id):
- """Use the node to bootstrap into the system."""
- df = self.conn.sendRequest('join', {"id":id})
- df.addErrback(self.errBack)
- df.addCallback(self.checkSender)
- return df
-
- def findNode(self, id, target):
- """Request the nearest nodes to the target that the node knows about."""
- df = self.conn.sendRequest('find_node', {"target" : target, "id": id})
- df.addErrback(self.errBack)
- df.addCallback(self.checkSender)
- return df
-
-class KNodeRead(KNodeBase):
- """More advanced node that can also find and send values."""
-
- def findValue(self, id, key):
- """Request the nearest nodes to the key that the node knows about."""
- df = self.conn.sendRequest('find_value', {"key" : key, "id" : id})
- df.addErrback(self.errBack)
- df.addCallback(self.checkSender)
- return df
-
- def getValue(self, id, key, num):
- """Request the values that the node has for the key."""
- df = self.conn.sendRequest('get_value', {"key" : key, "num": num, "id" : id})
- df.addErrback(self.errBack)
- df.addCallback(self.checkSender)
- return df
-
-class KNodeWrite(KNodeRead):
- """Most advanced node that can also store values."""
-
- def storeValue(self, id, key, value, token):
- """Store a value in the node."""
- df = self.conn.sendRequest('store_value', {"key" : key, "value" : value, "token" : token, "id": id})
- df.addErrback(self.errBack)
- df.addCallback(self.checkSender)
- return df
+++ /dev/null
-## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved
-# see LICENSE.txt for license information
-
-"""The KRPC communication protocol implementation.
-
-@var KRPC_TIMEOUT: the number of seconds after which requests timeout
-@var UDP_PACKET_LIMIT: the maximum number of bytes that can be sent in a
- UDP packet without fragmentation
-
-@var KRPC_ERROR: the code for a generic error
-@var KRPC_ERROR_SERVER_ERROR: the code for a server error
-@var KRPC_ERROR_MALFORMED_PACKET: the code for a malformed packet error
-@var KRPC_ERROR_METHOD_UNKNOWN: the code for a method unknown error
-@var KRPC_ERROR_MALFORMED_REQUEST: the code for a malformed request error
-@var KRPC_ERROR_INVALID_TOKEN: the code for an invalid token error
-@var KRPC_ERROR_RESPONSE_TOO_LONG: the code for a response too long error
-
-@var KRPC_ERROR_INTERNAL: the code for an internal error
-@var KRPC_ERROR_RECEIVED_UNKNOWN: the code for an unknown message type error
-@var KRPC_ERROR_TIMEOUT: the code for a timeout error
-@var KRPC_ERROR_PROTOCOL_STOPPED: the code for a stopped protocol error
-
-@var TID: the identifier for the transaction ID
-@var REQ: the identifier for a request packet
-@var RSP: the identifier for a response packet
-@var TYP: the identifier for the type of packet
-@var ARG: the identifier for the argument to the request
-@var ERR: the identifier for an error packet
-
-@group Remote node error codes: KRPC_ERROR, KRPC_ERROR_SERVER_ERROR,
- KRPC_ERROR_MALFORMED_PACKET, KRPC_ERROR_METHOD_UNKNOWN,
- KRPC_ERROR_MALFORMED_REQUEST, KRPC_ERROR_INVALID_TOKEN,
- KRPC_ERROR_RESPONSE_TOO_LONG
-@group Local node error codes: KRPC_ERROR_INTERNAL, KRPC_ERROR_RECEIVED_UNKNOWN,
- KRPC_ERROR_TIMEOUT, KRPC_ERROR_PROTOCOL_STOPPED
-@group Command identifiers: TID, REQ, RSP, TYP, ARG, ERR
-
-"""
-
-from bencode import bencode, bdecode
-from time import asctime
-from math import ceil
-
-from twisted.internet.defer import Deferred
-from twisted.internet import protocol, reactor
-from twisted.python import log
-from twisted.trial import unittest
-
-from khash import newID
-
-KRPC_TIMEOUT = 20
-UDP_PACKET_LIMIT = 1472
-
-# Remote node errors
-KRPC_ERROR = 200
-KRPC_ERROR_SERVER_ERROR = 201
-KRPC_ERROR_MALFORMED_PACKET = 202
-KRPC_ERROR_METHOD_UNKNOWN = 203
-KRPC_ERROR_MALFORMED_REQUEST = 204
-KRPC_ERROR_INVALID_TOKEN = 205
-KRPC_ERROR_RESPONSE_TOO_LONG = 206
-
-# Local errors
-KRPC_ERROR_INTERNAL = 100
-KRPC_ERROR_RECEIVED_UNKNOWN = 101
-KRPC_ERROR_TIMEOUT = 102
-KRPC_ERROR_PROTOCOL_STOPPED = 103
-
-# commands
-TID = 't'
-REQ = 'q'
-RSP = 'r'
-TYP = 'y'
-ARG = 'a'
-ERR = 'e'
-
-class KrpcError(Exception):
- """An error occurred in the KRPC protocol."""
- pass
-
-def verifyMessage(msg):
- """Check received message for corruption and errors.
-
- @type msg: C{dictionary}
- @param msg: the dictionary of information received on the connection
- @raise KrpcError: if the message is corrupt
- """
-
- if type(msg) != dict:
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "not a dictionary")
- if TYP not in msg:
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "no message type")
- if msg[TYP] == REQ:
- if REQ not in msg:
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "request type not specified")
- if type(msg[REQ]) != str:
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "request type is not a string")
- if ARG not in msg:
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "no arguments for request")
- if type(msg[ARG]) != dict:
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "arguments for request are not in a dictionary")
- elif msg[TYP] == RSP:
- if RSP not in msg:
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "response not specified")
- if type(msg[RSP]) != dict:
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "response is not a dictionary")
- elif msg[TYP] == ERR:
- if ERR not in msg:
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error not specified")
- if type(msg[ERR]) != list:
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error is not a list")
- if len(msg[ERR]) != 2:
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error is not a 2-element list")
- if type(msg[ERR][0]) not in (int, long):
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error number is not a number")
- if type(msg[ERR][1]) != str:
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error string is not a string")
-# else:
-# raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "unknown message type")
- if TID not in msg:
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "no transaction ID specified")
- if type(msg[TID]) != str:
- raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "transaction id is not a string")
-
-class hostbroker(protocol.DatagramProtocol):
- """The factory for the KRPC protocol.
-
- @type server: L{khashmir.Khashmir}
- @ivar server: the main Khashmir program
- @type config: C{dictionary}
- @ivar config: the configuration parameters for the DHT
- @type connections: C{dictionary}
- @ivar connections: all the connections that have ever been made to the
- protocol, keys are IP address and port pairs, values are L{KRPC}
- protocols for the addresses
- @ivar protocol: the protocol to use to handle incoming connections
- (added externally)
- @type addr: (C{string}, C{int})
- @ivar addr: the IP address and port of this node
- """
-
- def __init__(self, server, config):
- """Initialize the factory.
-
- @type server: L{khashmir.Khashmir}
- @param server: the main DHT program
- @type config: C{dictionary}
- @param config: the configuration parameters for the DHT
- """
- self.server = server
- self.config = config
- # this should be changed to storage that drops old entries
- self.connections = {}
-
- def datagramReceived(self, datagram, addr):
- """Optionally create a new protocol object, and handle the new datagram.
-
- @type datagram: C{string}
- @param datagram: the data received from the transport.
- @type addr: (C{string}, C{int})
- @param addr: source IP address and port of datagram.
- """
- c = self.connectionForAddr(addr)
- c.datagramReceived(datagram, addr)
- #if c.idle():
- # del self.connections[addr]
-
- def connectionForAddr(self, addr):
- """Get a protocol object for the source.
-
- @type addr: (C{string}, C{int})
- @param addr: source IP address and port of datagram.
- """
- # Don't connect to ourself
- if addr == self.addr:
- raise KrcpError
-
- # Create a new protocol object if necessary
- if not self.connections.has_key(addr):
- conn = self.protocol(addr, self.server, self.transport, self.config['SPEW'])
- self.connections[addr] = conn
- else:
- conn = self.connections[addr]
- return conn
-
- def makeConnection(self, transport):
- """Make a connection to a transport and save our address."""
- protocol.DatagramProtocol.makeConnection(self, transport)
- tup = transport.getHost()
- self.addr = (tup.host, tup.port)
-
- def stopProtocol(self):
- """Stop all the open connections."""
- for conn in self.connections.values():
- conn.stop()
- protocol.DatagramProtocol.stopProtocol(self)
-
-class KRPC:
- """The KRPC protocol implementation.
-
- @ivar transport: the transport to use for the protocol
- @type factory: L{khashmir.Khashmir}
- @ivar factory: the main Khashmir program
- @type addr: (C{string}, C{int})
- @ivar addr: the IP address and port of the source node
- @type noisy: C{boolean}
- @ivar noisy: whether to log additional details of the protocol
- @type tids: C{dictionary}
- @ivar tids: the transaction IDs outstanding for requests, keys are the
- transaction ID of the request, values are the deferreds to call with
- the results
- @type stopped: C{boolean}
- @ivar stopped: whether the protocol has been stopped
- """
-
- def __init__(self, addr, server, transport, spew = False):
- """Initialize the protocol.
-
- @type addr: (C{string}, C{int})
- @param addr: the IP address and port of the source node
- @type server: L{khashmir.Khashmir}
- @param server: the main Khashmir program
- @param transport: the transport to use for the protocol
- @type spew: C{boolean}
- @param spew: whether to log additional details of the protocol
- (optional, defaults to False)
- """
- self.transport = transport
- self.factory = server
- self.addr = addr
- self.noisy = spew
- self.tids = {}
- self.stopped = False
-
- def datagramReceived(self, data, addr):
- """Process the new datagram.
-
- @type data: C{string}
- @param data: the data received from the transport.
- @type addr: (C{string}, C{int})
- @param addr: source IP address and port of datagram.
- """
- if self.stopped:
- if self.noisy:
- log.msg("stopped, dropping message from %r: %s" % (addr, data))
-
- # Bdecode the message
- try:
- msg = bdecode(data)
- except Exception, e:
- if self.noisy:
- log.msg("krpc bdecode error: ")
- log.err(e)
- return
-
- # Make sure the remote node isn't trying anything funny
- try:
- verifyMessage(msg)
- except Exception, e:
- log.msg("krpc message verification error: ")
- log.err(e)
- return
-
- if self.noisy:
- log.msg("%d received from %r: %s" % (self.factory.port, addr, msg))
-
- # Process it based on its type
- if msg[TYP] == REQ:
- ilen = len(data)
-
- # Requests are handled by the factory
- f = getattr(self.factory ,"krpc_" + msg[REQ], None)
- msg[ARG]['_krpc_sender'] = self.addr
- if f and callable(f):
- try:
- ret = f(*(), **msg[ARG])
- except KrpcError, e:
- log.msg('Got a Krpc error while running: krpc_%s' % msg[REQ])
- log.err(e)
- olen = self._sendResponse(addr, msg[TID], ERR, [e[0], e[1]])
- except TypeError, e:
- log.msg('Got a malformed request for: krpc_%s' % msg[REQ])
- log.err(e)
- olen = self._sendResponse(addr, msg[TID], ERR,
- [KRPC_ERROR_MALFORMED_REQUEST, str(e)])
- except Exception, e:
- log.msg('Got an unknown error while running: krpc_%s' % msg[REQ])
- log.err(e)
- olen = self._sendResponse(addr, msg[TID], ERR,
- [KRPC_ERROR_SERVER_ERROR, str(e)])
- else:
- olen = self._sendResponse(addr, msg[TID], RSP, ret)
- else:
- # Request for unknown method
- log.msg("ERROR: don't know about method %s" % msg[REQ])
- olen = self._sendResponse(addr, msg[TID], ERR,
- [KRPC_ERROR_METHOD_UNKNOWN, "unknown method "+str(msg[REQ])])
- if self.noisy:
- log.msg("%s >>> %s - %s %s %s" % (addr, self.factory.node.port,
- ilen, msg[REQ], olen))
- elif msg[TYP] == RSP:
- # Responses get processed by their TID's deferred
- if self.tids.has_key(msg[TID]):
- df = self.tids[msg[TID]]
- # callback
- del(self.tids[msg[TID]])
- df.callback({'rsp' : msg[RSP], '_krpc_sender': addr})
- else:
- # no tid, this transaction timed out already...
- if self.noisy:
- log.msg('timeout: %r' % msg[RSP]['id'])
- elif msg[TYP] == ERR:
- # Errors get processed by their TID's deferred's errback
- if self.tids.has_key(msg[TID]):
- df = self.tids[msg[TID]]
- del(self.tids[msg[TID]])
- # callback
- df.errback(KrpcError(*msg[ERR]))
- else:
- # day late and dollar short, just log it
- log.msg("Got an error for an unknown request: %r" % (msg[ERR], ))
- pass
- else:
- # Received an unknown message type
- if self.noisy:
- log.msg("unknown message type: %r" % msg)
- if msg[TID] in self.tids:
- df = self.tids[msg[TID]]
- del(self.tids[msg[TID]])
- # callback
- df.errback(KrpcError(KRPC_ERROR_RECEIVED_UNKNOWN,
- "Received an unknown message type: %r" % msg[TYP]))
-
- def _sendResponse(self, addr, tid, msgType, response):
- """Helper function for sending responses to nodes.
-
- @type addr: (C{string}, C{int})
- @param addr: source IP address and port of datagram.
- @param tid: the transaction ID of the request
- @param msgType: the type of message to respond with
- @param response: the arguments for the response
- """
- if not response:
- response = {}
-
- try:
- # Create the response message
- msg = {TID : tid, TYP : msgType, msgType : response}
-
- if self.noisy:
- log.msg("%d responding to %r: %s" % (self.factory.port, addr, msg))
-
- out = bencode(msg)
-
- # Make sure its not too long
- if len(out) > UDP_PACKET_LIMIT:
- # Can we remove some values to shorten it?
- if 'values' in response:
- # Save the original list of values
- orig_values = response['values']
- len_orig_values = len(bencode(orig_values))
-
- # Caclulate the maximum value length possible
- max_len_values = len_orig_values - (len(out) - UDP_PACKET_LIMIT)
- assert max_len_values > 0
-
- # Start with a calculation of how many values should be included
- # (assumes all values are the same length)
- per_value = (float(len_orig_values) - 2.0) / float(len(orig_values))
- num_values = len(orig_values) - int(ceil(float(len(out) - UDP_PACKET_LIMIT) / per_value))
-
- # Do a linear search for the actual maximum number possible
- bencoded_values = len(bencode(orig_values[:num_values]))
- while bencoded_values < max_len_values and num_values + 1 < len(orig_values):
- bencoded_values += len(bencode(orig_values[num_values]))
- num_values += 1
- while bencoded_values > max_len_values and num_values > 0:
- num_values -= 1
- bencoded_values -= len(bencode(orig_values[num_values]))
- assert num_values > 0
-
- # Encode the result
- response['values'] = orig_values[:num_values]
- out = bencode(msg)
- assert len(out) < UDP_PACKET_LIMIT
- log.msg('Shortened a long packet from %d to %d values, new packet length: %d' %
- (len(orig_values), num_values, len(out)))
- else:
- # Too long a response, send an error
- log.msg('Could not send response, too long: %d bytes' % len(out))
- msg = {TID : tid, TYP : ERR, ERR : [KRPC_ERROR_RESPONSE_TOO_LONG, "response was %d bytes" % len(out)]}
- out = bencode(msg)
-
- except Exception, e:
- # Unknown error, send an error message
- msg = {TID : tid, TYP : ERR, ERR : [KRPC_ERROR_SERVER_ERROR, "unknown error sending response: %s" % str(e)]}
- out = bencode(msg)
-
- self.transport.write(out, addr)
- return len(out)
-
- def sendRequest(self, method, args):
- """Send a request to the remote node.
-
- @type method: C{string}
- @param method: the methiod name to call on the remote node
- @param args: the arguments to send to the remote node's method
- """
- if self.stopped:
- raise KrpcError, (KRPC_ERROR_PROTOCOL_STOPPED, "cannot send, connection has been stopped")
-
- # Create the request message
- msg = {TID : newID(), TYP : REQ, REQ : method, ARG : args}
- if self.noisy:
- log.msg("%d sending to %r: %s" % (self.factory.port, self.addr, msg))
- data = bencode(msg)
-
- # Create the deferred and save it with the TID
- d = Deferred()
- self.tids[msg[TID]] = d
-
- # Schedule a later timeout call
- def timeOut(tids = self.tids, id = msg[TID], method = method, addr = self.addr):
- """Call the deferred's errback if a timeout occurs."""
- if tids.has_key(id):
- df = tids[id]
- del(tids[id])
- df.errback(KrpcError(KRPC_ERROR_TIMEOUT, "timeout waiting for '%s' from %r" % (method, addr)))
- later = reactor.callLater(KRPC_TIMEOUT, timeOut)
-
- # Cancel the timeout call if a response is received
- def dropTimeOut(dict, later_call = later):
- """Cancel the timeout call when a response is received."""
- if later_call.active():
- later_call.cancel()
- return dict
- d.addBoth(dropTimeOut)
-
- self.transport.write(data, self.addr)
- return d
-
- def stop(self):
- """Timeout all pending requests."""
- for df in self.tids.values():
- df.errback(KrpcError(KRPC_ERROR_PROTOCOL_STOPPED, 'connection has been stopped while waiting for response'))
- self.tids = {}
- self.stopped = True
-
-#{ For testing the KRPC protocol
-def connectionForAddr(host, port):
- return host
-
-class Receiver(protocol.Factory):
- protocol = KRPC
- def __init__(self):
- self.buf = []
- def krpc_store(self, msg, _krpc_sender):
- self.buf += [msg]
- return {}
- def krpc_echo(self, msg, _krpc_sender):
- return {'msg': msg}
- def krpc_values(self, length, num, _krpc_sender):
- return {'values': ['1'*length]*num}
-
-def make(port):
- af = Receiver()
- a = hostbroker(af, {'SPEW': False})
- a.protocol = KRPC
- p = reactor.listenUDP(port, a)
- return af, a, p
-
-class KRPCTests(unittest.TestCase):
- timeout = 2
-
- def setUp(self):
- self.af, self.a, self.ap = make(1180)
- self.bf, self.b, self.bp = make(1181)
-
- def tearDown(self):
- self.ap.stopListening()
- self.bp.stopListening()
-
- def bufEquals(self, result, value):
- self.failUnlessEqual(self.bf.buf, value)
-
- def testSimpleMessage(self):
- d = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('store', {'msg' : "This is a test."})
- d.addCallback(self.bufEquals, ["This is a test."])
- return d
-
- def testMessageBlast(self):
- for i in range(100):
- d = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('store', {'msg' : "This is a test."})
- d.addCallback(self.bufEquals, ["This is a test."] * 100)
- return d
-
- def testEcho(self):
- df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."})
- df.addCallback(self.gotMsg, "This is a test.")
- return df
-
- def gotMsg(self, dict, should_be):
- _krpc_sender = dict['_krpc_sender']
- msg = dict['rsp']
- self.failUnlessEqual(msg['msg'], should_be)
-
- def testManyEcho(self):
- for i in xrange(100):
- df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."})
- df.addCallback(self.gotMsg, "This is a test.")
- return df
-
- def testMultiEcho(self):
- df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."})
- df.addCallback(self.gotMsg, "This is a test.")
-
- df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is another test."})
- df.addCallback(self.gotMsg, "This is another test.")
-
- df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is yet another test."})
- df.addCallback(self.gotMsg, "This is yet another test.")
-
- return df
-
- def testEchoReset(self):
- df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."})
- df.addCallback(self.gotMsg, "This is a test.")
-
- df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is another test."})
- df.addCallback(self.gotMsg, "This is another test.")
- df.addCallback(self.echoReset)
- return df
-
- def echoReset(self, dict):
- del(self.a.connections[('127.0.0.1', 1181)])
- df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is yet another test."})
- df.addCallback(self.gotMsg, "This is yet another test.")
- return df
-
- def testUnknownMeth(self):
- df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('blahblah', {'msg' : "This is a test."})
- df.addBoth(self.gotErr, KRPC_ERROR_METHOD_UNKNOWN)
- return df
-
- def testMalformedRequest(self):
- df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test.", 'foo': 'bar'})
- df.addBoth(self.gotErr, KRPC_ERROR_MALFORMED_REQUEST)
- return df
-
- def gotErr(self, err, should_be):
- self.failUnlessEqual(err.value[0], should_be)
-
- def testLongPackets(self):
- df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('values', {'length' : 1, 'num': 2000})
- df.addCallback(self.gotLongRsp)
- return df
-
- def gotLongRsp(self, dict):
- # Not quite accurate, but good enough
- self.failUnless(len(bencode(dict))-10 < UDP_PACKET_LIMIT)
-
\ No newline at end of file
+++ /dev/null
-## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved
-# see LICENSE.txt for license information
-
-"""The routing table and buckets for a kademlia-like DHT."""
-
-from datetime import datetime
-from bisect import bisect_left
-
-from twisted.python import log
-from twisted.trial import unittest
-
-import khash
-from node import Node, NULL_ID
-
-class KTable:
- """Local routing table for a kademlia-like distributed hash table.
-
- @type node: L{node.Node}
- @ivar node: the local node
- @type config: C{dictionary}
- @ivar config: the configuration parameters for the DHT
- @type buckets: C{list} of L{KBucket}
- @ivar buckets: the buckets of nodes in the routing table
- """
-
- def __init__(self, node, config):
- """Initialize the first empty bucket of everything.
-
- @type node: L{node.Node}
- @param node: the local node
- @type config: C{dictionary}
- @param config: the configuration parameters for the DHT
- """
- # this is the root node, a.k.a. US!
- assert node.id != NULL_ID
- self.node = node
- self.config = config
- self.buckets = [KBucket([], 0L, 2L**self.config['HASH_LENGTH'])]
-
- def _bucketIndexForInt(self, num):
- """Find the index of the bucket that should hold the node's ID number."""
- return bisect_left(self.buckets, num)
-
- def findNodes(self, id):
- """Find the K nodes in our own local table closest to the ID.
-
- @type id: C{string} of C{int} or L{node.Node}
- @param id: the ID to find nodes that are close to
- @raise TypeError: if id does not properly identify an ID
- """
-
- # Get the ID number from the input
- if isinstance(id, str):
- num = khash.intify(id)
- elif isinstance(id, Node):
- num = id.num
- elif isinstance(id, int) or isinstance(id, long):
- num = id
- else:
- raise TypeError, "findNodes requires an int, string, or Node"
-
- nodes = []
- i = self._bucketIndexForInt(num)
-
- # If this node is already in our table then return it
- try:
- index = self.buckets[i].l.index(num)
- except ValueError:
- pass
- else:
- return [self.buckets[i].l[index]]
-
- # Don't have the node, get the K closest nodes from the appropriate bucket
- nodes = nodes + self.buckets[i].l
-
- # Make sure we have enough
- if len(nodes) < self.config['K']:
- # Look in adjoining buckets for nodes
- min = i - 1
- max = i + 1
- while len(nodes) < self.config['K'] and (min >= 0 or max < len(self.buckets)):
- # Add the adjoining buckets' nodes to the list
- if min >= 0:
- nodes = nodes + self.buckets[min].l
- if max < len(self.buckets):
- nodes = nodes + self.buckets[max].l
- min = min - 1
- max = max + 1
-
- # Sort the found nodes by proximity to the id and return the closest K
- nodes.sort(lambda a, b, num=num: cmp(num ^ a.num, num ^ b.num))
- return nodes[:self.config['K']]
-
- def _splitBucket(self, a):
- """Split a bucket in two.
-
- @type a: L{KBucket}
- @param a: the bucket to split
- """
- # Create a new bucket with half the (upper) range of the current bucket
- diff = (a.max - a.min) / 2
- b = KBucket([], a.max - diff, a.max)
- self.buckets.insert(self.buckets.index(a.min) + 1, b)
-
- # Reduce the input bucket's (upper) range
- a.max = a.max - diff
-
- # Transfer nodes to the new bucket
- for anode in a.l[:]:
- if anode.num >= a.max:
- a.l.remove(anode)
- b.l.append(anode)
-
- def replaceStaleNode(self, stale, new = None):
- """Replace a stale node in a bucket with a new one.
-
- This is used by clients to replace a node returned by insertNode after
- it fails to respond to a ping.
-
- @type stale: L{node.Node}
- @param stale: the stale node to remove from the bucket
- @type new: L{node.Node}
- @param new: the new node to add in it's place (optional, defaults to
- not adding any node in the old node's place)
- """
- # Find the stale node's bucket
- i = self._bucketIndexForInt(stale.num)
- try:
- it = self.buckets[i].l.index(stale.num)
- except ValueError:
- return
-
- # Remove the stale node and insert the new one
- del(self.buckets[i].l[it])
- if new:
- self.buckets[i].l.append(new)
-
- def insertNode(self, node, contacted = True):
- """Try to insert a node in the routing table.
-
- This inserts the node, returning None if successful, otherwise returns
- the oldest node in the bucket if it's full. The caller is then
- responsible for pinging the returned node and calling replaceStaleNode
- if it doesn't respond. contacted means that yes, we contacted THEM and
- we know the node is reachable.
-
- @type node: L{node.Node}
- @param node: the new node to try and insert
- @type contacted: C{boolean}
- @param contacted: whether the new node is known to be good, i.e.
- responded to a request (optional, defaults to True)
- @rtype: L{node.Node}
- @return: None if successful (the bucket wasn't full), otherwise returns the oldest node in the bucket
- """
- assert node.id != NULL_ID
- if node.id == self.node.id: return
-
- # Get the bucket for this node
- i = self. _bucketIndexForInt(node.num)
-
- # Check to see if node is in the bucket already
- try:
- it = self.buckets[i].l.index(node.num)
- except ValueError:
- pass
- else:
- # The node is already in the bucket
- if contacted:
- # It responded, so update it
- node.updateLastSeen()
- # move node to end of bucket
- xnode = self.buckets[i].l[it]
- del(self.buckets[i].l[it])
- # note that we removed the original and replaced it with the new one
- # utilizing this nodes new contact info
- self.buckets[i].l.append(xnode)
- self.buckets[i].touch()
- return
-
- # We don't have this node, check to see if the bucket is full
- if len(self.buckets[i].l) < self.config['K']:
- # Not full, append this node and return
- if contacted:
- node.updateLastSeen()
- self.buckets[i].l.append(node)
- self.buckets[i].touch()
- return
-
- # Bucket is full, check to see if the local node is not in the bucket
- if not (self.buckets[i].min <= self.node < self.buckets[i].max):
- # Local node not in the bucket, can't split it, return the oldest node
- return self.buckets[i].l[0]
-
- # Make sure our table isn't FULL, this is really unlikely
- if len(self.buckets) >= self.config['HASH_LENGTH']:
- log.err("Hash Table is FULL! Increase K!")
- return
-
- # This bucket is full and contains our node, split the bucket
- self._splitBucket(self.buckets[i])
-
- # Now that the bucket is split and balanced, try to insert the node again
- return self.insertNode(node)
-
- def justSeenNode(self, id):
- """Mark a node as just having been seen.
-
- Call this any time you get a message from a node, it will update it
- in the table if it's there.
-
- @type id: C{string} of C{int} or L{node.Node}
- @param id: the node ID to mark as just having been seen
- @rtype: C{datetime.datetime}
- @return: the old lastSeen time of the node, or None if it's not in the table
- """
- try:
- n = self.findNodes(id)[0]
- except IndexError:
- return None
- else:
- tstamp = n.lastSeen
- n.updateLastSeen()
- return tstamp
-
- def invalidateNode(self, n):
- """Remove the node from the routing table.
-
- Forget about node n. Use this when you know that a node is invalid.
- """
- self.replaceStaleNode(n)
-
- def nodeFailed(self, node):
- """Mark a node as having failed once, and remove it if it has failed too much."""
- try:
- n = self.findNodes(node.num)[0]
- except IndexError:
- return None
- else:
- if n.msgFailed() >= self.config['MAX_FAILURES']:
- self.invalidateNode(n)
-
-class KBucket:
- """Single bucket of nodes in a kademlia-like routing table.
-
- @type l: C{list} of L{node.Node}
- @ivar l: the nodes that are in this bucket
- @type min: C{long}
- @ivar min: the minimum node ID that can be in this bucket
- @type max: C{long}
- @ivar max: the maximum node ID that can be in this bucket
- @type lastAccessed: C{datetime.datetime}
- @ivar lastAccessed: the last time a node in this bucket was successfully contacted
- """
-
- def __init__(self, contents, min, max):
- """Initialize the bucket with nodes.
-
- @type contents: C{list} of L{node.Node}
- @param contents: the nodes to store in the bucket
- @type min: C{long}
- @param min: the minimum node ID that can be in this bucket
- @type max: C{long}
- @param max: the maximum node ID that can be in this bucket
- """
- self.l = contents
- self.min = min
- self.max = max
- self.lastAccessed = datetime.now()
-
- def touch(self):
- """Update the L{lastAccessed} time."""
- self.lastAccessed = datetime.now()
-
- def getNodeWithInt(self, num):
- """Get the node in the bucket with that number.
-
- @type num: C{long}
- @param num: the node ID to look for
- @raise ValueError: if the node ID is not in the bucket
- @rtype: L{node.Node}
- @return: the node
- """
- if num in self.l: return num
- else: raise ValueError
-
- def __repr__(self):
- return "<KBucket %d items (%d to %d)>" % (len(self.l), self.min, self.max)
-
- #{ Comparators to bisect/index a list of buckets (by their range) with either a node or a long
- def __lt__(self, a):
- if isinstance(a, Node): a = a.num
- return self.max <= a
- def __le__(self, a):
- if isinstance(a, Node): a = a.num
- return self.min < a
- def __gt__(self, a):
- if isinstance(a, Node): a = a.num
- return self.min > a
- def __ge__(self, a):
- if isinstance(a, Node): a = a.num
- return self.max >= a
- def __eq__(self, a):
- if isinstance(a, Node): a = a.num
- return self.min <= a and self.max > a
- def __ne__(self, a):
- if isinstance(a, Node): a = a.num
- return self.min >= a or self.max < a
-
-class TestKTable(unittest.TestCase):
- """Unit tests for the routing table."""
-
- def setUp(self):
- self.a = Node(khash.newID(), '127.0.0.1', 2002)
- self.t = KTable(self.a, {'HASH_LENGTH': 160, 'K': 8, 'MAX_FAILURES': 3})
-
- def testAddNode(self):
- self.b = Node(khash.newID(), '127.0.0.1', 2003)
- self.t.insertNode(self.b)
- self.failUnlessEqual(len(self.t.buckets[0].l), 1)
- self.failUnlessEqual(self.t.buckets[0].l[0], self.b)
-
- def testRemove(self):
- self.testAddNode()
- self.t.invalidateNode(self.b)
- self.failUnlessEqual(len(self.t.buckets[0].l), 0)
-
- def testFail(self):
- self.testAddNode()
- for i in range(self.t.config['MAX_FAILURES'] - 1):
- self.t.nodeFailed(self.b)
- self.failUnlessEqual(len(self.t.buckets[0].l), 1)
- self.failUnlessEqual(self.t.buckets[0].l[0], self.b)
-
- self.t.nodeFailed(self.b)
- self.failUnlessEqual(len(self.t.buckets[0].l), 0)
+++ /dev/null
-## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved
-# see LICENSE.txt for license information
-
-"""Represents a node in the DHT.
-
-@type NULL_ID: C{string}
-@var NULL_ID: the node ID to use until one is known
-"""
-
-from datetime import datetime, MINYEAR
-from types import InstanceType
-
-from twisted.trial import unittest
-
-import khash
-from util import compact
-
-# magic id to use before we know a peer's id
-NULL_ID = 20 * '\0'
-
-class Node:
- """Encapsulate a node's contact info.
-
- @ivar conn: the connection to the remote node (added externally)
- @ivar table: the routing table (added externally)
- @type fails: C{int}
- @ivar fails: number of times this node has failed in a row
- @type lastSeen: C{datetime.datetime}
- @ivar lastSeen: the last time a response was received from this node
- @type id: C{string}
- @ivar id: the node's ID in the DHT
- @type num: C{long}
- @ivar num: the node's ID in number form
- @type host: C{string}
- @ivar host: the IP address of the node
- @type port: C{int}
- @ivar port: the port of the node
- @type token: C{string}
- @ivar token: the last received token from the node
- @type num_values: C{int}
- @ivar num_values: the number of values the node has for the key in the
- currently executing action
- """
-
- def __init__(self, id, host = None, port = None):
- """Initialize the node.
-
- @type id: C{string} or C{dictionary}
- @param id: the node's ID in the DHT, or a dictionary containing the
- node's id, host and port
- @type host: C{string}
- @param host: the IP address of the node
- (optional, but must be specified if id is not a dictionary)
- @type port: C{int}
- @param port: the port of the node
- (optional, but must be specified if id is not a dictionary)
- """
- self.fails = 0
- self.lastSeen = datetime(MINYEAR, 1, 1)
-
- # Alternate method, init Node from dictionary
- if isinstance(id, dict):
- host = id['host']
- port = id['port']
- id = id['id']
-
- assert isinstance(id, str)
- assert isinstance(host, str)
- self.id = id
- self.num = khash.intify(id)
- self.host = host
- self.port = int(port)
- self.token = ''
- self.num_values = 0
- self._contactInfo = None
-
- def updateLastSeen(self):
- """Updates the last contact time of the node and resets the number of failures."""
- self.lastSeen = datetime.now()
- self.fails = 0
-
- def updateToken(self, token):
- """Update the token for the node."""
- self.token = token
-
- def updateNumValues(self, num_values):
- """Update how many values the node has in the current search for a value."""
- self.num_values = num_values
-
- def msgFailed(self):
- """Log a failed attempt to contact this node.
-
- @rtype: C{int}
- @return: the number of consecutive failures this node has
- """
- self.fails = self.fails + 1
- return self.fails
-
- def contactInfo(self):
- """Get the compact contact info for the node."""
- if self._contactInfo is None:
- self._contactInfo = compact(self.id, self.host, self.port)
- return self._contactInfo
-
- def __repr__(self):
- return `(self.id, self.host, self.port)`
-
- #{ Comparators to bisect/index a list of nodes with either a node or a long
- def __lt__(self, a):
- if type(a) == InstanceType:
- a = a.num
- return self.num < a
- def __le__(self, a):
- if type(a) == InstanceType:
- a = a.num
- return self.num <= a
- def __gt__(self, a):
- if type(a) == InstanceType:
- a = a.num
- return self.num > a
- def __ge__(self, a):
- if type(a) == InstanceType:
- a = a.num
- return self.num >= a
- def __eq__(self, a):
- if type(a) == InstanceType:
- a = a.num
- return self.num == a
- def __ne__(self, a):
- if type(a) == InstanceType:
- a = a.num
- return self.num != a
-
-
-class TestNode(unittest.TestCase):
- """Unit tests for the node implementation."""
- def setUp(self):
- self.node = Node(khash.newID(), '127.0.0.1', 2002)
- def testUpdateLastSeen(self):
- t = self.node.lastSeen
- self.node.updateLastSeen()
- self.failUnless(t < self.node.lastSeen)
-
\ No newline at end of file
+++ /dev/null
-## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved
-# see LICENSE.txt for license information
-
-"""Some utitlity functions for use in apt-p2p's khashmir DHT."""
-
-from twisted.trial import unittest
-
-def bucket_stats(l):
- """Given a list of khashmir instances, finds min, max, and average number of nodes in tables."""
- max = avg = 0
- min = None
- def count(buckets):
- c = 0
- for bucket in buckets:
- c = c + len(bucket.l)
- return c
- for node in l:
- c = count(node.table.buckets)
- if min == None:
- min = c
- elif c < min:
- min = c
- if c > max:
- max = c
- avg = avg + c
- avg = avg / len(l)
- return {'min':min, 'max':max, 'avg':avg}
-
-def uncompact(s):
- """Extract the contact info from a compact node representation.
-
- @type s: C{string}
- @param s: the compact representation
- @rtype: C{dictionary}
- @return: the node ID, IP address and port to contact the node on
- @raise ValueError: if the compact representation doesn't exist
- """
- if (len(s) != 26):
- raise ValueError
- id = s[:20]
- host = '.'.join([str(ord(i)) for i in s[20:24]])
- port = (ord(s[24]) << 8) | ord(s[25])
- return {'id': id, 'host': host, 'port': port}
-
-def compact(id, host, port):
- """Create a compact representation of node contact info.
-
- @type id: C{string}
- @param id: the node ID
- @type host: C{string}
- @param host: the IP address of the node
- @type port: C{int}
- @param port: the port number to contact the node on
- @rtype: C{string}
- @return: the compact representation
- @raise ValueError: if the compact representation doesn't exist
- """
-
- s = id + ''.join([chr(int(i)) for i in host.split('.')]) + \
- chr((port & 0xFF00) >> 8) + chr(port & 0xFF)
- if len(s) != 26:
- raise ValueError
- return s
-
-class TestUtil(unittest.TestCase):
- """Tests for the utilities."""
-
- timeout = 5
- myid = '\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!'
- host = '165.234.1.34'
- port = 61234
-
- def test_compact(self):
- d = uncompact(compact(self.myid, self.host, self.port))
- self.failUnlessEqual(d['id'], self.myid)
- self.failUnlessEqual(d['host'], self.host)
- self.failUnlessEqual(d['port'], self.port)
-
\ No newline at end of file
--- /dev/null
+#
+# Copyright (C) 2002 Manuel Estrada Sainz <ranty@debian.org>
+# Copyright (C) 2008 Cameron Dale <camrdale@gmail.com>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""Manage a mirror's index files.
+
+@type TRACKED_FILES: C{list} of C{string}
+@var TRACKED_FILES: the file names of files that contain index information
+"""
+
+# Disable the FutureWarning from the apt module
+import warnings
+warnings.simplefilter("ignore", FutureWarning)
+
+import os, shelve
+from random import choice
+from shutil import rmtree
+from copy import deepcopy
+from UserDict import DictMixin
+
+from twisted.internet import threads, defer, reactor
+from twisted.python import log
+from twisted.python.filepath import FilePath
+from twisted.trial import unittest
+
+import apt_pkg, apt_inst
+from apt import OpProgress
+from debian_bundle import deb822
+
+from Hash import HashObject
+
+apt_pkg.init()
+
+TRACKED_FILES = ['release', 'sources', 'packages']
+
+class PackageFileList(DictMixin):
+ """Manages a list of index files belonging to a mirror.
+
+ @type cache_dir: L{twisted.python.filepath.FilePath}
+ @ivar cache_dir: the directory to use for storing all files
+ @type packages: C{shelve dictionary}
+ @ivar packages: the files tracked for this mirror
+ """
+
+ def __init__(self, cache_dir):
+ """Initialize the list by opening the dictionary."""
+ self.cache_dir = cache_dir
+ self.cache_dir.restat(False)
+ if not self.cache_dir.exists():
+ self.cache_dir.makedirs()
+ self.packages = None
+ self.open()
+
+ def open(self):
+ """Open the persistent dictionary of files for this mirror."""
+ if self.packages is None:
+ self.packages = shelve.open(self.cache_dir.child('packages.db').path)
+
+ def close(self):
+ """Close the persistent dictionary."""
+ if self.packages is not None:
+ self.packages.close()
+
+ def update_file(self, cache_path, file_path):
+ """Check if an updated file needs to be tracked.
+
+ Called from the mirror manager when files get updated so we can update our
+ fake lists and sources.list.
+
+ @type cache_path: C{string}
+ @param cache_path: the location of the file within the mirror
+ @type file_path: L{twisted.python.filepath.FilePath}
+ @param file_path: The location of the file in the file system
+ @rtype: C{boolean}
+ @return: whether the file is an index file
+ """
+ filename = cache_path.split('/')[-1]
+ if filename.lower() in TRACKED_FILES:
+ log.msg("Registering package file: "+cache_path)
+ self.packages[cache_path] = file_path
+ return True
+ return False
+
+ def check_files(self):
+ """Check all files in the database to remove any that don't exist."""
+ files = self.packages.keys()
+ for f in files:
+ self.packages[f].restat(False)
+ if not self.packages[f].exists():
+ log.msg("File in packages database has been deleted: "+f)
+ del self.packages[f]
+
+ #{ Dictionary interface details
+ def __getitem__(self, key): return self.packages[key]
+ def __setitem__(self, key, item): self.packages[key] = item
+ def __delitem__(self, key): del self.packages[key]
+ def keys(self): return self.packages.keys()
+
+class AptPackages:
+ """Answers queries about packages available from a mirror.
+
+ Uses the python-apt tools to parse and provide information about the
+ files that are available on a single mirror.
+
+ @ivar DEFAULT_APT_CONFIG: the default configuration parameters to use for apt
+ @ivar essential_dirs: directories that must be created for apt to work
+ @ivar essential_files: files that must be created for apt to work
+ @type cache_dir: L{twisted.python.filepath.FilePath}
+ @ivar cache_dir: the directory to use for storing all files
+ @type unload_delay: C{int}
+ @ivar unload_delay: the time to wait before unloading the apt cache
+ @ivar apt_config: the configuration parameters to use for apt
+ @type packages: L{PackageFileList}
+ @ivar packages: the persistent storage of tracked apt index files
+ @type loaded: C{boolean}
+ @ivar loaded: whether the apt cache is currently loaded
+ @type loading: L{twisted.internet.defer.Deferred}
+ @ivar loading: if the cache is currently being loaded, this will be
+ called when it is loaded, otherwise it is None
+ @type unload_later: L{twisted.internet.interfaces.IDelayedCall}
+ @ivar unload_later: the delayed call to unload the apt cache
+ @type indexrecords: C{dictionary}
+ @ivar indexrecords: the hashes of index files for the mirror, keys are
+ mirror directories, values are dictionaries with keys the path to the
+ index file in the mirror directory and values are dictionaries with
+ keys the hash type and values the hash
+ @type cache: C{apt_pkg.GetCache()}
+ @ivar cache: the apt cache of the mirror
+ @type records: C{apt_pkg.GetPkgRecords()}
+ @ivar records: the apt package records for all binary packages in a mirror
+ @type srcrecords: C{apt_pkg.GetPkgSrcRecords}
+ @ivar srcrecords: the apt package records for all source packages in a mirror
+ """
+
+ DEFAULT_APT_CONFIG = {
+ #'APT' : '',
+ #'APT::Architecture' : 'i386', # Commented so the machine's config will set this
+ #'APT::Default-Release' : 'unstable',
+ 'Dir':'.', # /
+ 'Dir::State' : 'apt/', # var/lib/apt/
+ 'Dir::State::Lists': 'lists/', # lists/
+ #'Dir::State::cdroms' : 'cdroms.list',
+ 'Dir::State::userstatus' : 'status.user',
+ 'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
+ 'Dir::Cache' : '.apt/cache/', # var/cache/apt/
+ #'Dir::Cache::archives' : 'archives/',
+ 'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
+ 'Dir::Cache::pkgcache' : 'pkgcache.bin',
+ 'Dir::Etc' : 'apt/etc/', # etc/apt/
+ 'Dir::Etc::sourcelist' : 'sources.list',
+ 'Dir::Etc::vendorlist' : 'vendors.list',
+ 'Dir::Etc::vendorparts' : 'vendors.list.d',
+ #'Dir::Etc::main' : 'apt.conf',
+ #'Dir::Etc::parts' : 'apt.conf.d',
+ #'Dir::Etc::preferences' : 'preferences',
+ 'Dir::Bin' : '',
+ #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
+ 'Dir::Bin::dpkg' : '/usr/bin/dpkg',
+ #'DPkg' : '',
+ #'DPkg::Pre-Install-Pkgs' : '',
+ #'DPkg::Tools' : '',
+ #'DPkg::Tools::Options' : '',
+ #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
+ #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
+ #'DPkg::Post-Invoke' : '',
+ }
+ essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
+ 'apt/lists/partial')
+ essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
+
+ def __init__(self, cache_dir, unload_delay):
+ """Construct a new packages manager.
+
+ @param cache_dir: directory to use to store files for this mirror
+ """
+ self.cache_dir = cache_dir
+ self.unload_delay = unload_delay
+ self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
+
+ # Create the necessary files and directories for apt
+ for dir in self.essential_dirs:
+ path = self.cache_dir.preauthChild(dir)
+ if not path.exists():
+ path.makedirs()
+ for file in self.essential_files:
+ path = self.cache_dir.preauthChild(file)
+ if not path.exists():
+ path.touch()
+
+ self.apt_config['Dir'] = self.cache_dir.path
+ self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path
+ self.packages = PackageFileList(cache_dir)
+ self.loaded = False
+ self.loading = None
+ self.unload_later = None
+
+ def __del__(self):
+ self.cleanup()
+
+ def addRelease(self, cache_path, file_path):
+ """Add a Release file's info to the list of index files.
+
+ Dirty hack until python-apt supports apt-pkg/indexrecords.h
+ (see Bug #456141)
+ """
+ self.indexrecords[cache_path] = {}
+
+ read_packages = False
+ f = file_path.open('r')
+
+ # Use python-debian routines to parse the file for hashes
+ rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256'])
+ for hash_type in rel:
+ for file in rel[hash_type]:
+ self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file[hash_type], file['size'])
+
+ f.close()
+
+ def file_updated(self, cache_path, file_path):
+ """A file in the mirror has changed or been added.
+
+ If this affects us, unload our apt database.
+ @see: L{PackageFileList.update_file}
+ """
+ if self.packages.update_file(cache_path, file_path):
+ self.unload()
+
+ def load(self):
+ """Make sure the package cache is initialized and loaded."""
+ # Reset the pending unload call
+ if self.unload_later and self.unload_later.active():
+ self.unload_later.reset(self.unload_delay)
+ else:
+ self.unload_later = reactor.callLater(self.unload_delay, self.unload)
+
+ # Make sure it's not already being loaded
+ if self.loading is None:
+ log.msg('Loading the packages cache')
+ self.loading = threads.deferToThread(self._load)
+ self.loading.addCallback(self.doneLoading)
+ return self.loading
+
+ def doneLoading(self, loadResult):
+ """Cache is loaded."""
+ self.loading = None
+ # Must pass on the result for the next callback
+ return loadResult
+
+ def _load(self):
+ """Regenerates the fake configuration and loads the packages caches."""
+ if self.loaded: return True
+
+ # Modify the default configuration to create the fake one.
+ apt_pkg.InitSystem()
+ self.cache_dir.preauthChild(self.apt_config['Dir::State']
+ ).preauthChild(self.apt_config['Dir::State::Lists']).remove()
+ self.cache_dir.preauthChild(self.apt_config['Dir::State']
+ ).preauthChild(self.apt_config['Dir::State::Lists']
+ ).child('partial').makedirs()
+ sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc']
+ ).preauthChild(self.apt_config['Dir::Etc::sourcelist'])
+ sources = sources_file.open('w')
+ sources_count = 0
+ deb_src_added = False
+ self.packages.check_files()
+ self.indexrecords = {}
+
+ # Create an entry in sources.list for each needed index file
+ for f in self.packages:
+ # we should probably clear old entries from self.packages and
+ # take into account the recorded mtime as optimization
+ file = self.packages[f]
+ if f.split('/')[-1] == "Release":
+ self.addRelease(f, file)
+ fake_uri='http://apt-p2p'+f
+ fake_dirname = '/'.join(fake_uri.split('/')[:-1])
+ if f.endswith('Sources'):
+ deb_src_added = True
+ source_line='deb-src '+fake_dirname+'/ /'
+ else:
+ source_line='deb '+fake_dirname+'/ /'
+ listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State']
+ ).preauthChild(self.apt_config['Dir::State::Lists']
+ ).child(apt_pkg.URItoFileName(fake_uri))
+ sources.write(source_line+'\n')
+ log.msg("Sources line: " + source_line)
+ sources_count = sources_count + 1
+
+ if listpath.exists():
+ #we should empty the directory instead
+ listpath.remove()
+ os.symlink(file.path, listpath.path)
+ sources.close()
+
+ if sources_count == 0:
+ log.msg("No Packages files available for %s backend"%(self.cache_dir.path))
+ return False
+
+ log.msg("Loading Packages database for "+self.cache_dir.path)
+ for key, value in self.apt_config.items():
+ apt_pkg.Config[key] = value
+
+ self.cache = apt_pkg.GetCache(OpProgress())
+ self.records = apt_pkg.GetPkgRecords(self.cache)
+ if deb_src_added:
+ self.srcrecords = apt_pkg.GetPkgSrcRecords()
+ else:
+ self.srcrecords = None
+
+ self.loaded = True
+ return True
+
+ def unload(self):
+ """Tries to make the packages server quit."""
+ if self.unload_later and self.unload_later.active():
+ self.unload_later.cancel()
+ self.unload_later = None
+ if self.loaded:
+ log.msg('Unloading the packages cache')
+ # This should save memory
+ del self.cache
+ del self.records
+ del self.srcrecords
+ del self.indexrecords
+ self.loaded = False
+
+ def cleanup(self):
+ """Cleanup and close any loaded caches."""
+ self.unload()
+ if self.unload_later and self.unload_later.active():
+ self.unload_later.cancel()
+ self.packages.close()
+
+ def findHash(self, path):
+ """Find the hash for a given path in this mirror.
+
+ @type path: C{string}
+ @param path: the path within the mirror of the file to lookup
+ @rtype: L{twisted.internet.defer.Deferred}
+ @return: a deferred so it can make sure the cache is loaded first
+ """
+ d = defer.Deferred()
+
+ deferLoad = self.load()
+ deferLoad.addCallback(self._findHash, path, d)
+ deferLoad.addErrback(self._findHash_error, path, d)
+
+ return d
+
+ def _findHash_error(self, failure, path, d):
+ """An error occurred, return an empty hash."""
+ log.msg('An error occurred while looking up a hash for: %s' % path)
+ log.err(failure)
+ d.callback(HashObject())
+ return failure
+
+ def _findHash(self, loadResult, path, d):
+ """Search the records for the hash of a path.
+
+ @type loadResult: C{boolean}
+ @param loadResult: whether apt's cache was successfully loaded
+ @type path: C{string}
+ @param path: the path within the mirror of the file to lookup
+ @type d: L{twisted.internet.defer.Deferred}
+ @param d: the deferred to callback with the result
+ """
+ if not loadResult:
+ d.callback(HashObject())
+ return loadResult
+
+ h = HashObject()
+
+ # First look for the path in the cache of index files
+ for release in self.indexrecords:
+ if path.startswith(release[:-7]):
+ for indexFile in self.indexrecords[release]:
+ if release[:-7] + indexFile == path:
+ h.setFromIndexRecord(self.indexrecords[release][indexFile])
+ d.callback(h)
+ return loadResult
+
+ package = path.split('/')[-1].split('_')[0]
+
+ # Check the binary packages
+ try:
+ for version in self.cache[package].VersionList:
+ size = version.Size
+ for verFile in version.FileList:
+ if self.records.Lookup(verFile):
+ if '/' + self.records.FileName == path:
+ h.setFromPkgRecord(self.records, size)
+ d.callback(h)
+ return loadResult
+ except KeyError:
+ pass
+
+ # Check the source packages' files
+ if self.srcrecords:
+ self.srcrecords.Restart()
+ if self.srcrecords.Lookup(package):
+ for f in self.srcrecords.Files:
+ if path == '/' + f[2]:
+ h.setFromSrcRecord(f)
+ d.callback(h)
+ return loadResult
+
+ d.callback(h)
+
+ # Have to pass the returned loadResult on in case other calls to this function are pending.
+ return loadResult
+
+class TestAptPackages(unittest.TestCase):
+ """Unit tests for the AptPackages cache."""
+
+ pending_calls = []
+ client = None
+ timeout = 10
+ packagesFile = ''
+ sourcesFile = ''
+ releaseFile = ''
+
+ def setUp(self):
+ """Initializes the cache with files found in the traditional apt location."""
+ self.client = AptPackages(FilePath('/tmp/.apt-p2p'), 300)
+
+ # Find the largest index files that are for 'main'
+ self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
+ self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
+
+ # Find the Release file corresponding to the found Packages file
+ for f in os.walk('/var/lib/apt/lists').next()[2]:
+ if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
+ self.releaseFile = f
+ break
+
+ # Add all the found files to the PackageFileList
+ self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'),
+ FilePath('/var/lib/apt/lists/' + self.releaseFile))
+ self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'),
+ FilePath('/var/lib/apt/lists/' + self.packagesFile))
+ self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'),
+ FilePath('/var/lib/apt/lists/' + self.sourcesFile))
+
+ def test_pkg_hash(self):
+ """Tests loading the binary package records cache."""
+ self.client._load()
+
+ self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
+
+ pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.packagesFile +
+ ' | grep -E "^SHA1:" | head -n 1' +
+ ' | cut -d\ -f 2').read().rstrip('\n')
+
+ self.failUnless(self.client.records.SHA1Hash == pkg_hash,
+ "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
+
+ def test_src_hash(self):
+ """Tests loading the source package records cache."""
+ self.client._load()
+
+ self.client.srcrecords.Lookup('dpkg')
+
+ src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.sourcesFile +
+ ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
+ ' | cut -d\ -f 2').read().split('\n')[:-1]
+
+ for f in self.client.srcrecords.Files:
+ self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
+
+ def test_index_hash(self):
+ """Tests loading the cache of index file information."""
+ self.client._load()
+
+ indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
+
+ idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
+ '/var/lib/apt/lists/' + self.releaseFile +
+ ' | grep -E " main/binary-i386/Packages.bz2$"'
+ ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
+
+ self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
+
+ def verifyHash(self, found_hash, path, true_hash):
+ self.failUnless(found_hash.hexexpected() == true_hash,
+ "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
+
+ def test_findIndexHash(self):
+ """Tests finding the hash of a single index file."""
+ lastDefer = defer.Deferred()
+
+ idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
+ '/var/lib/apt/lists/' + self.releaseFile +
+ ' | grep -E " main/binary-i386/Packages.bz2$"'
+ ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
+ idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
+
+ d = self.client.findHash(idx_path)
+ d.addCallback(self.verifyHash, idx_path, idx_hash)
+
+ d.addBoth(lastDefer.callback)
+ return lastDefer
+
+ def test_findPkgHash(self):
+ """Tests finding the hash of a single binary package."""
+ lastDefer = defer.Deferred()
+
+ pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.packagesFile +
+ ' | grep -E "^SHA1:" | head -n 1' +
+ ' | cut -d\ -f 2').read().rstrip('\n')
+ pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.packagesFile +
+ ' | grep -E "^Filename:" | head -n 1' +
+ ' | cut -d\ -f 2').read().rstrip('\n')
+
+ d = self.client.findHash(pkg_path)
+ d.addCallback(self.verifyHash, pkg_path, pkg_hash)
+
+ d.addBoth(lastDefer.callback)
+ return lastDefer
+
+ def test_findSrcHash(self):
+ """Tests finding the hash of a single source package."""
+ lastDefer = defer.Deferred()
+
+ src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.sourcesFile +
+ ' | grep -E "^Directory:" | head -n 1' +
+ ' | cut -d\ -f 2').read().rstrip('\n')
+ src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.sourcesFile +
+ ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
+ ' | cut -d\ -f 2').read().split('\n')[:-1]
+ src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.sourcesFile +
+ ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
+ ' | cut -d\ -f 4').read().split('\n')[:-1]
+
+ i = choice(range(len(src_hashes)))
+ d = self.client.findHash(src_dir + '/' + src_paths[i])
+ d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
+
+ d.addBoth(lastDefer.callback)
+ return lastDefer
+
+ def test_multipleFindHash(self):
+ """Tests finding the hash of an index file, binary package, source package, and another index file."""
+ lastDefer = defer.Deferred()
+
+ # Lookup a Packages.bz2 file
+ idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
+ '/var/lib/apt/lists/' + self.releaseFile +
+ ' | grep -E " main/binary-i386/Packages.bz2$"'
+ ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
+ idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
+
+ d = self.client.findHash(idx_path)
+ d.addCallback(self.verifyHash, idx_path, idx_hash)
+
+ # Lookup the binary 'dpkg' package
+ pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.packagesFile +
+ ' | grep -E "^SHA1:" | head -n 1' +
+ ' | cut -d\ -f 2').read().rstrip('\n')
+ pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.packagesFile +
+ ' | grep -E "^Filename:" | head -n 1' +
+ ' | cut -d\ -f 2').read().rstrip('\n')
+
+ d = self.client.findHash(pkg_path)
+ d.addCallback(self.verifyHash, pkg_path, pkg_hash)
+
+ # Lookup the source 'dpkg' package
+ src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.sourcesFile +
+ ' | grep -E "^Directory:" | head -n 1' +
+ ' | cut -d\ -f 2').read().rstrip('\n')
+ src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.sourcesFile +
+ ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
+ ' | cut -d\ -f 2').read().split('\n')[:-1]
+ src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.sourcesFile +
+ ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
+ ' | cut -d\ -f 4').read().split('\n')[:-1]
+
+ for i in range(len(src_hashes)):
+ d = self.client.findHash(src_dir + '/' + src_paths[i])
+ d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
+
+ # Lookup a Sources.bz2 file
+ idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
+ '/var/lib/apt/lists/' + self.releaseFile +
+ ' | grep -E " main/source/Sources.bz2$"'
+ ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
+ idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
+
+ d = self.client.findHash(idx_path)
+ d.addCallback(self.verifyHash, idx_path, idx_hash)
+
+ d.addBoth(lastDefer.callback)
+ return lastDefer
+
+ def tearDown(self):
+ for p in self.pending_calls:
+ if p.active():
+ p.cancel()
+ self.pending_calls = []
+ self.client.cleanup()
+ self.client = None
--- /dev/null
+
+"""Manage a cache of downloaded files.
+
+@var DECOMPRESS_EXTS: a list of file extensions that need to be decompressed
+@var DECOMPRESS_FILES: a list of file names that need to be decompressed
+"""
+
+from bz2 import BZ2Decompressor
+from zlib import decompressobj, MAX_WBITS
+from gzip import FCOMMENT, FEXTRA, FHCRC, FNAME, FTEXT
+from urlparse import urlparse
+import os
+
+from twisted.python import log
+from twisted.python.filepath import FilePath
+from twisted.internet import defer, reactor
+from twisted.trial import unittest
+from twisted.web2 import stream
+from twisted.web2.http import splitHostPort
+
+from Hash import HashObject
+
+DECOMPRESS_EXTS = ['.gz', '.bz2']
+DECOMPRESS_FILES = ['release', 'sources', 'packages']
+
+class ProxyFileStream(stream.SimpleStream):
+ """Saves a stream to a file while providing a new stream.
+
+ Also optionally decompresses the file while it is being downloaded.
+
+ @type stream: L{twisted.web2.stream.IByteStream}
+ @ivar stream: the input stream being read
+ @type outFile: L{twisted.python.filepath.FilePath}
+ @ivar outFile: the file being written
+ @type hash: L{Hash.HashObject}
+ @ivar hash: the hash object for the file
+ @type gzfile: C{file}
+ @ivar gzfile: the open file to write decompressed gzip data to
+ @type gzdec: L{zlib.decompressobj}
+ @ivar gzdec: the decompressor to use for the compressed gzip data
+ @type gzheader: C{boolean}
+ @ivar gzheader: whether the gzip header still needs to be removed from
+ the zlib compressed data
+ @type bz2file: C{file}
+ @ivar bz2file: the open file to write decompressed bz2 data to
+ @type bz2dec: L{bz2.BZ2Decompressor}
+ @ivar bz2dec: the decompressor to use for the compressed bz2 data
+ @type length: C{int}
+ @ivar length: the length of the original (compressed) file
+ @type doneDefer: L{twisted.internet.defer.Deferred}
+ @ivar doneDefer: the deferred that will fire when done streaming
+
+ @group Stream implementation: read, close
+
+ """
+
+ def __init__(self, stream, outFile, hash, decompress = None, decFile = None):
+ """Initializes the proxy.
+
+ @type stream: L{twisted.web2.stream.IByteStream}
+ @param stream: the input stream to read from
+ @type outFile: L{twisted.python.filepath.FilePath}
+ @param outFile: the file to write to
+ @type hash: L{Hash.HashObject}
+ @param hash: the hash object to use for the file
+ @type decompress: C{string}
+ @param decompress: also decompress the file as this type
+ (currently only '.gz' and '.bz2' are supported)
+ @type decFile: C{twisted.python.FilePath}
+ @param decFile: the file to write the decompressed data to
+ """
+ self.stream = stream
+ self.outFile = outFile.open('w')
+ self.hash = hash
+ self.hash.new()
+ self.gzfile = None
+ self.bz2file = None
+ if decompress == ".gz":
+ self.gzheader = True
+ self.gzfile = decFile.open('w')
+ self.gzdec = decompressobj(-MAX_WBITS)
+ elif decompress == ".bz2":
+ self.bz2file = decFile.open('w')
+ self.bz2dec = BZ2Decompressor()
+ self.length = self.stream.length
+ self.doneDefer = defer.Deferred()
+
+ def _done(self):
+ """Close all the output files, return the result."""
+ if not self.outFile.closed:
+ self.outFile.close()
+ self.hash.digest()
+ if self.gzfile:
+ # Finish the decompression
+ data_dec = self.gzdec.flush()
+ self.gzfile.write(data_dec)
+ self.gzfile.close()
+ self.gzfile = None
+ if self.bz2file:
+ self.bz2file.close()
+ self.bz2file = None
+
+ self.doneDefer.callback(self.hash)
+
+ def read(self):
+ """Read some data from the stream."""
+ if self.outFile.closed:
+ return None
+
+ # Read data from the stream, deal with the possible deferred
+ data = self.stream.read()
+ if isinstance(data, defer.Deferred):
+ data.addCallbacks(self._write, self._done)
+ return data
+
+ self._write(data)
+ return data
+
+ def _write(self, data):
+ """Write the stream data to the file and return it for others to use.
+
+ Also optionally decompresses it.
+ """
+ if data is None:
+ self._done()
+ return data
+
+ # Write and hash the streamed data
+ self.outFile.write(data)
+ self.hash.update(data)
+
+ if self.gzfile:
+ # Decompress the zlib portion of the file
+ if self.gzheader:
+ # Remove the gzip header junk
+ self.gzheader = False
+ new_data = self._remove_gzip_header(data)
+ dec_data = self.gzdec.decompress(new_data)
+ else:
+ dec_data = self.gzdec.decompress(data)
+ self.gzfile.write(dec_data)
+ if self.bz2file:
+ # Decompress the bz2 file
+ dec_data = self.bz2dec.decompress(data)
+ self.bz2file.write(dec_data)
+
+ return data
+
+ def _remove_gzip_header(self, data):
+ """Remove the gzip header from the zlib compressed data."""
+ # Read, check & discard the header fields
+ if data[:2] != '\037\213':
+ raise IOError, 'Not a gzipped file'
+ if ord(data[2]) != 8:
+ raise IOError, 'Unknown compression method'
+ flag = ord(data[3])
+ # modtime = self.fileobj.read(4)
+ # extraflag = self.fileobj.read(1)
+ # os = self.fileobj.read(1)
+
+ skip = 10
+ if flag & FEXTRA:
+ # Read & discard the extra field
+ xlen = ord(data[10])
+ xlen = xlen + 256*ord(data[11])
+ skip = skip + 2 + xlen
+ if flag & FNAME:
+ # Read and discard a null-terminated string containing the filename
+ while True:
+ if not data[skip] or data[skip] == '\000':
+ break
+ skip += 1
+ skip += 1
+ if flag & FCOMMENT:
+ # Read and discard a null-terminated string containing a comment
+ while True:
+ if not data[skip] or data[skip] == '\000':
+ break
+ skip += 1
+ skip += 1
+ if flag & FHCRC:
+ skip += 2 # Read & discard the 16-bit header CRC
+
+ return data[skip:]
+
+ def close(self):
+ """Clean everything up and return None to future reads."""
+ self.length = 0
+ self._done()
+ self.stream.close()
+
+class CacheManager:
+ """Manages all downloaded files and requests for cached objects.
+
+ @type cache_dir: L{twisted.python.filepath.FilePath}
+ @ivar cache_dir: the directory to use for storing all files
+ @type other_dirs: C{list} of L{twisted.python.filepath.FilePath}
+ @ivar other_dirs: the other directories that have shared files in them
+ @type all_dirs: C{list} of L{twisted.python.filepath.FilePath}
+ @ivar all_dirs: all the directories that have cached files in them
+ @type db: L{db.DB}
+ @ivar db: the database to use for tracking files and hashes
+ @type manager: L{apt_p2p.AptP2P}
+ @ivar manager: the main program object to send requests to
+ @type scanning: C{list} of L{twisted.python.filepath.FilePath}
+ @ivar scanning: all the directories that are currectly being scanned or waiting to be scanned
+ """
+
+ def __init__(self, cache_dir, db, other_dirs = [], manager = None):
+ """Initialize the instance and remove any untracked files from the DB..
+
+ @type cache_dir: L{twisted.python.filepath.FilePath}
+ @param cache_dir: the directory to use for storing all files
+ @type db: L{db.DB}
+ @param db: the database to use for tracking files and hashes
+ @type other_dirs: C{list} of L{twisted.python.filepath.FilePath}
+ @param other_dirs: the other directories that have shared files in them
+ (optional, defaults to only using the cache directory)
+ @type manager: L{apt_p2p.AptP2P}
+ @param manager: the main program object to send requests to
+ (optional, defaults to not calling back with cached files)
+ """
+ self.cache_dir = cache_dir
+ self.other_dirs = other_dirs
+ self.all_dirs = self.other_dirs[:]
+ self.all_dirs.insert(0, self.cache_dir)
+ self.db = db
+ self.manager = manager
+ self.scanning = []
+
+ # Init the database, remove old files
+ self.db.removeUntrackedFiles(self.all_dirs)
+
+ #{ Scanning directories
+ def scanDirectories(self):
+ """Scan the cache directories, hashing new and rehashing changed files."""
+ assert not self.scanning, "a directory scan is already under way"
+ self.scanning = self.all_dirs[:]
+ self._scanDirectories()
+
+ def _scanDirectories(self, result = None, walker = None):
+ """Walk each directory looking for cached files.
+
+ @param result: the result of a DHT store request, not used (optional)
+ @param walker: the walker to use to traverse the current directory
+ (optional, defaults to creating a new walker from the first
+ directory in the L{CacheManager.scanning} list)
+ """
+ # Need to start walking a new directory
+ if walker is None:
+ # If there are any left, get them
+ if self.scanning:
+ log.msg('started scanning directory: %s' % self.scanning[0].path)
+ walker = self.scanning[0].walk()
+ else:
+ log.msg('cache directory scan complete')
+ return
+
+ try:
+ # Get the next file in the directory
+ file = walker.next()
+ except StopIteration:
+ # No files left, go to the next directory
+ log.msg('done scanning directory: %s' % self.scanning[0].path)
+ self.scanning.pop(0)
+ reactor.callLater(0, self._scanDirectories)
+ return
+
+ # If it's not a file ignore it
+ if not file.isfile():
+ log.msg('entering directory: %s' % file.path)
+ reactor.callLater(0, self._scanDirectories, None, walker)
+ return
+
+ # If it's already properly in the DB, ignore it
+ db_status = self.db.isUnchanged(file)
+ if db_status:
+ log.msg('file is unchanged: %s' % file.path)
+ reactor.callLater(0, self._scanDirectories, None, walker)
+ return
+
+ # Don't hash files in the cache that are not in the DB
+ if self.scanning[0] == self.cache_dir:
+ if db_status is None:
+ log.msg('ignoring unknown cache file: %s' % file.path)
+ else:
+ log.msg('removing changed cache file: %s' % file.path)
+ file.remove()
+ reactor.callLater(0, self._scanDirectories, None, walker)
+ return
+
+ # Otherwise hash it
+ log.msg('start hash checking file: %s' % file.path)
+ hash = HashObject()
+ df = hash.hashInThread(file)
+ df.addBoth(self._doneHashing, file, walker)
+ df.addErrback(log.err)
+
+ def _doneHashing(self, result, file, walker):
+ """If successful, add the hashed file to the DB and inform the main program."""
+ if isinstance(result, HashObject):
+ log.msg('hash check of %s completed with hash: %s' % (file.path, result.hexdigest()))
+
+ # Only set a URL if this is a downloaded file
+ url = None
+ if self.scanning[0] == self.cache_dir:
+ url = 'http:/' + file.path[len(self.cache_dir.path):]
+
+ # Store the hashed file in the database
+ new_hash = self.db.storeFile(file, result.digest())
+
+ # Tell the main program to handle the new cache file
+ df = self.manager.new_cached_file(file, result, new_hash, url, True)
+ if df is None:
+ reactor.callLater(0, self._scanDirectories, None, walker)
+ else:
+ df.addBoth(self._scanDirectories, walker)
+ else:
+ # Must have returned an error
+ log.msg('hash check of %s failed' % file.path)
+ log.err(result)
+ reactor.callLater(0, self._scanDirectories, None, walker)
+
+ #{ Downloading files
+ def save_file(self, response, hash, url):
+ """Save a downloaded file to the cache and stream it.
+
+ @type response: L{twisted.web2.http.Response}
+ @param response: the response from the download
+ @type hash: L{Hash.HashObject}
+ @param hash: the hash object containing the expected hash for the file
+ @param url: the URI of the actual mirror request
+ @rtype: L{twisted.web2.http.Response}
+ @return: the final response from the download
+ """
+ if response.code != 200:
+ log.msg('File was not found (%r): %s' % (response, url))
+ return response
+
+ log.msg('Returning file: %s' % url)
+
+ # Set the destination path for the file
+ parsed = urlparse(url)
+ destFile = self.cache_dir.preauthChild(parsed[1] + parsed[2])
+ log.msg('Saving returned %r byte file to cache: %s' % (response.stream.length, destFile.path))
+
+ # Make sure there's a free place for the file
+ if destFile.exists():
+ log.msg('File already exists, removing: %s' % destFile.path)
+ destFile.remove()
+ elif not destFile.parent().exists():
+ destFile.parent().makedirs()
+
+ # Determine whether it needs to be decompressed and how
+ root, ext = os.path.splitext(destFile.basename())
+ if root.lower() in DECOMPRESS_FILES and ext.lower() in DECOMPRESS_EXTS:
+ ext = ext.lower()
+ decFile = destFile.sibling(root)
+ log.msg('Decompressing to: %s' % decFile.path)
+ if decFile.exists():
+ log.msg('File already exists, removing: %s' % decFile.path)
+ decFile.remove()
+ else:
+ ext = None
+ decFile = None
+
+ # Create the new stream from the old one.
+ orig_stream = response.stream
+ response.stream = ProxyFileStream(orig_stream, destFile, hash, ext, decFile)
+ response.stream.doneDefer.addCallback(self._save_complete, url, destFile,
+ response.headers.getHeader('Last-Modified'),
+ decFile)
+ response.stream.doneDefer.addErrback(self.save_error, url)
+
+ # Return the modified response with the new stream
+ return response
+
+ def _save_complete(self, hash, url, destFile, modtime = None, decFile = None):
+ """Update the modification time and inform the main program.
+
+ @type hash: L{Hash.HashObject}
+ @param hash: the hash object containing the expected hash for the file
+ @param url: the URI of the actual mirror request
+ @type destFile: C{twisted.python.FilePath}
+ @param destFile: the file where the download was written to
+ @type modtime: C{int}
+ @param modtime: the modified time of the cached file (seconds since epoch)
+ (optional, defaults to not setting the modification time of the file)
+ @type decFile: C{twisted.python.FilePath}
+ @param decFile: the file where the decompressed download was written to
+ (optional, defaults to the file not having been compressed)
+ """
+ if modtime:
+ os.utime(destFile.path, (modtime, modtime))
+ if decFile:
+ os.utime(decFile.path, (modtime, modtime))
+
+ result = hash.verify()
+ if result or result is None:
+ if result:
+ log.msg('Hashes match: %s' % url)
+ else:
+ log.msg('Hashed file to %s: %s' % (hash.hexdigest(), url))
+
+ new_hash = self.db.storeFile(destFile, hash.digest())
+ log.msg('now avaliable: %s' % (url))
+
+ if self.manager:
+ self.manager.new_cached_file(destFile, hash, new_hash, url)
+ if decFile:
+ ext_len = len(destFile.path) - len(decFile.path)
+ self.manager.new_cached_file(decFile, None, False, url[:-ext_len])
+ else:
+ log.msg("Hashes don't match %s != %s: %s" % (hash.hexexpected(), hash.hexdigest(), url))
+ destFile.remove()
+ if decFile:
+ decFile.remove()
+
+ def save_error(self, failure, url):
+ """An error has occurred in downloadign or saving the file."""
+ log.msg('Error occurred downloading %s' % url)
+ log.err(failure)
+ return failure
+
+class TestMirrorManager(unittest.TestCase):
+ """Unit tests for the mirror manager."""
+
+ timeout = 20
+ pending_calls = []
+ client = None
+
+ def setUp(self):
+ self.client = CacheManager(FilePath('/tmp/.apt-p2p'))
+
+ def tearDown(self):
+ for p in self.pending_calls:
+ if p.active():
+ p.cancel()
+ self.client = None
+
\ No newline at end of file
--- /dev/null
+
+"""Manage all download requests to a single site."""
+
+from math import exp
+from datetime import datetime, timedelta
+
+from twisted.internet import reactor, defer, protocol
+from twisted.internet.protocol import ClientFactory
+from twisted import version as twisted_version
+from twisted.python import log
+from twisted.web2.client.interfaces import IHTTPClientManager
+from twisted.web2.client.http import ProtocolError, ClientRequest, HTTPClientProtocol
+from twisted.web2 import stream as stream_mod, http_headers
+from twisted.web2 import version as web2_version
+from twisted.trial import unittest
+from zope.interface import implements
+
+from apt_p2p_conf import version
+
+class Peer(ClientFactory):
+ """A manager for all HTTP requests to a single peer.
+
+ Controls all requests that go to a single peer (host and port).
+ This includes buffering requests until they can be sent and reconnecting
+ in the event of the connection being closed.
+
+ """
+
+ implements(IHTTPClientManager)
+
+ def __init__(self, host, port=80):
+ self.host = host
+ self.port = port
+ self.busy = False
+ self.pipeline = False
+ self.closed = True
+ self.connecting = False
+ self.request_queue = []
+ self.response_queue = []
+ self.proto = None
+ self.connector = None
+ self._errors = 0
+ self._completed = 0
+ self._downloadSpeeds = []
+ self._lastResponse = None
+ self._responseTimes = []
+
+ #{ Manage the request queue
+ def connect(self):
+ """Connect to the peer."""
+ assert self.closed and not self.connecting
+ self.connecting = True
+ d = protocol.ClientCreator(reactor, HTTPClientProtocol, self).connectTCP(self.host, self.port)
+ d.addCallback(self.connected)
+
+ def connected(self, proto):
+ """Begin processing the queued requests."""
+ self.closed = False
+ self.connecting = False
+ self.proto = proto
+ self.processQueue()
+
+ def close(self):
+ """Close the connection to the peer."""
+ if not self.closed:
+ self.proto.transport.loseConnection()
+
+ def submitRequest(self, request):
+ """Add a new request to the queue.
+
+ @type request: L{twisted.web2.client.http.ClientRequest}
+ @return: deferred that will fire with the completed request
+ """
+ request.submissionTime = datetime.now()
+ request.deferRequest = defer.Deferred()
+ self.request_queue.append(request)
+ self.processQueue()
+ return request.deferRequest
+
+ def processQueue(self):
+ """Check the queue to see if new requests can be sent to the peer."""
+ if not self.request_queue:
+ return
+ if self.connecting:
+ return
+ if self.closed:
+ self.connect()
+ return
+ if self.busy and not self.pipeline:
+ return
+ if self.response_queue and not self.pipeline:
+ return
+
+ req = self.request_queue.pop(0)
+ self.response_queue.append(req)
+ req.deferResponse = self.proto.submitRequest(req, False)
+ req.deferResponse.addCallbacks(self.requestComplete, self.requestError)
+
+ def requestComplete(self, resp):
+ """Process a completed request."""
+ self._processLastResponse()
+ req = self.response_queue.pop(0)
+ log.msg('%s of %s completed with code %d' % (req.method, req.uri, resp.code))
+ self._completed += 1
+ if resp.code >= 400:
+ self._errors += 1
+ now = datetime.now()
+ self._responseTimes.append((now, now - req.submissionTime))
+ self._lastResponse = (now, resp.stream.length)
+ req.deferRequest.callback(resp)
+
+ def requestError(self, error):
+ """Process a request that ended with an error."""
+ self._processLastResponse()
+ req = self.response_queue.pop(0)
+ log.msg('Download of %s generated error %r' % (req.uri, error))
+ self._completed += 1
+ self._errors += 1
+ req.deferRequest.errback(error)
+
+ def hashError(self, error):
+ """Log that a hash error occurred from the peer."""
+ log.msg('Hash error from peer (%s, %d): %r' % (self.host, self.port, error))
+ self._errors += 1
+
+ #{ IHTTPClientManager interface
+ def clientBusy(self, proto):
+ """Save the busy state."""
+ self.busy = True
+
+ def clientIdle(self, proto):
+ """Try to send a new request."""
+ self._processLastResponse()
+ self.busy = False
+ self.processQueue()
+
+ def clientPipelining(self, proto):
+ """Try to send a new request."""
+ self.pipeline = True
+ self.processQueue()
+
+ def clientGone(self, proto):
+ """Mark sent requests as errors."""
+ self._processLastResponse()
+ for req in self.response_queue:
+ req.deferRequest.errback(ProtocolError('lost connection'))
+ self.busy = False
+ self.pipeline = False
+ self.closed = True
+ self.connecting = False
+ self.response_queue = []
+ self.proto = None
+ if self.request_queue:
+ self.processQueue()
+
+ #{ Downloading request interface
+ def setCommonHeaders(self):
+ """Get the common HTTP headers for all requests."""
+ headers = http_headers.Headers()
+ headers.setHeader('Host', self.host)
+ headers.setHeader('User-Agent', 'apt-p2p/%s (twisted/%s twisted.web2/%s)' %
+ (version.short(), twisted_version.short(), web2_version.short()))
+ return headers
+
+ def get(self, path, method="GET", modtime=None):
+ """Add a new request to the queue.
+
+ @type path: C{string}
+ @param path: the path to request from the peer
+ @type method: C{string}
+ @param method: the HTTP method to use, 'GET' or 'HEAD'
+ (optional, defaults to 'GET')
+ @type modtime: C{int}
+ @param modtime: the modification time to use for an 'If-Modified-Since'
+ header, as seconds since the epoch
+ (optional, defaults to not sending that header)
+ """
+ headers = self.setCommonHeaders()
+ if modtime:
+ headers.setHeader('If-Modified-Since', modtime)
+ return self.submitRequest(ClientRequest(method, path, headers, None))
+
+ def getRange(self, path, rangeStart, rangeEnd, method="GET"):
+ """Add a new request with a Range header to the queue.
+
+ @type path: C{string}
+ @param path: the path to request from the peer
+ @type rangeStart: C{int}
+ @param rangeStart: the byte to begin the request at
+ @type rangeEnd: C{int}
+ @param rangeEnd: the byte to end the request at (inclusive)
+ @type method: C{string}
+ @param method: the HTTP method to use, 'GET' or 'HEAD'
+ (optional, defaults to 'GET')
+ """
+ headers = self.setCommonHeaders()
+ headers.setHeader('Range', ('bytes', [(rangeStart, rangeEnd)]))
+ return self.submitRequest(ClientRequest(method, path, headers, None))
+
+ #{ Peer information
+ def isIdle(self):
+ """Check whether the peer is idle or not."""
+ return not self.busy and not self.request_queue and not self.response_queue
+
+ def _processLastResponse(self):
+ """Save the download time of the last request for speed calculations."""
+ if self._lastResponse is not None:
+ now = datetime.now()
+ self._downloadSpeeds.append((now, now - self._lastResponse[0], self._lastResponse[1]))
+ self._lastResponse = None
+
+ def downloadSpeed(self):
+ """Gets the latest average download speed for the peer.
+
+ The average is over the last 10 responses that occurred in the last hour.
+ """
+ total_time = 0.0
+ total_download = 0
+ now = datetime.now()
+ while self._downloadSpeeds and (len(self._downloadSpeeds) > 10 or
+ now - self._downloadSpeeds[0][0] > timedelta(seconds=3600)):
+ self._downloadSpeeds.pop(0)
+
+ # If there are none, then you get 0
+ if not self._downloadSpeeds:
+ return 0.0
+
+ for download in self._downloadSpeeds:
+ total_time += download[1].days*86400.0 + download[1].seconds + download[1].microseconds/1000000.0
+ total_download += download[2]
+
+ return total_download / total_time
+
+ def responseTime(self):
+ """Gets the latest average response time for the peer.
+
+ Response time is the time from receiving the request, to the time
+ the download begins. The average is over the last 10 responses that
+ occurred in the last hour.
+ """
+ total_response = 0.0
+ now = datetime.now()
+ while self._responseTimes and (len(self._responseTimes) > 10 or
+ now - self._responseTimes[0][0] > timedelta(seconds=3600)):
+ self._responseTimes.pop(0)
+
+ # If there are none, give it the benefit of the doubt
+ if not self._responseTimes:
+ return 0.0
+
+ for response in self._responseTimes:
+ total_response += response[1].days*86400.0 + response[1].seconds + response[1].microseconds/1000000.0
+
+ return total_response / len(self._responseTimes)
+
+ def rank(self, fastest):
+ """Determine the ranking value for the peer.
+
+ The ranking value is composed of 5 numbers:
+ - 1 if a connection to the peer is open, 0.9 otherwise
+ - 1 if there are no pending requests, to 0 if there are a maximum
+ - 1 if the peer is the fastest of all peers, to 0 if the speed is 0
+ - 1 if all requests are good, 0 if all produced errors
+ - an exponentially decreasing number based on the response time
+ """
+ rank = 1.0
+ if self.closed:
+ rank *= 0.9
+ rank *= (max(0.0, 10.0 - len(self.request_queue) - len(self.response_queue))) / 10.0
+ if fastest > 0.0:
+ rank *= min(1.0, self.downloadSpeed() / fastest)
+ if self._completed:
+ rank *= max(0.0, 1.0 - float(self._errors) / self._completed)
+ rank *= exp(-self.responseTime() / 5.0)
+ return rank
+
+class TestClientManager(unittest.TestCase):
+ """Unit tests for the Peer."""
+
+ client = None
+ pending_calls = []
+
+ def gotResp(self, resp, num, expect):
+ self.failUnless(resp.code >= 200 and resp.code < 300, "Got a non-200 response: %r" % resp.code)
+ if expect is not None:
+ self.failUnless(resp.stream.length == expect, "Length was incorrect, got %r, expected %r" % (resp.stream.length, expect))
+ def print_(n):
+ pass
+ def printdone(n):
+ pass
+ stream_mod.readStream(resp.stream, print_).addCallback(printdone)
+
+ def test_download(self):
+ """Tests a normal download."""
+ host = 'www.ietf.org'
+ self.client = Peer(host, 80)
+ self.timeout = 10
+
+ d = self.client.get('/rfc/rfc0013.txt')
+ d.addCallback(self.gotResp, 1, 1070)
+ return d
+
+ def test_head(self):
+ """Tests a 'HEAD' request."""
+ host = 'www.ietf.org'
+ self.client = Peer(host, 80)
+ self.timeout = 10
+
+ d = self.client.get('/rfc/rfc0013.txt', "HEAD")
+ d.addCallback(self.gotResp, 1, 0)
+ return d
+
+ def test_multiple_downloads(self):
+ """Tests multiple downloads with queueing and connection closing."""
+ host = 'www.ietf.org'
+ self.client = Peer(host, 80)
+ self.timeout = 120
+ lastDefer = defer.Deferred()
+
+ def newRequest(path, num, expect, last=False):
+ d = self.client.get(path)
+ d.addCallback(self.gotResp, num, expect)
+ if last:
+ d.addBoth(lastDefer.callback)
+
+ # 3 quick requests
+ newRequest("/rfc/rfc0006.txt", 1, 1776)
+ newRequest("/rfc/rfc2362.txt", 2, 159833)
+ newRequest("/rfc/rfc0801.txt", 3, 40824)
+
+ # This one will probably be queued
+ self.pending_calls.append(reactor.callLater(1, newRequest, '/rfc/rfc0013.txt', 4, 1070))
+
+ # Connection should still be open, but idle
+ self.pending_calls.append(reactor.callLater(10, newRequest, '/rfc/rfc0022.txt', 5, 4606))
+
+ #Connection should be closed
+ self.pending_calls.append(reactor.callLater(30, newRequest, '/rfc/rfc0048.txt', 6, 41696))
+ self.pending_calls.append(reactor.callLater(31, newRequest, '/rfc/rfc3261.txt', 7, 647976))
+ self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0014.txt', 8, 27))
+ self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0001.txt', 9, 21088))
+
+ # Now it should definitely be closed
+ self.pending_calls.append(reactor.callLater(62, newRequest, '/rfc/rfc2801.txt', 0, 598794, True))
+ return lastDefer
+
+ def test_multiple_quick_downloads(self):
+ """Tests lots of multiple downloads with queueing."""
+ host = 'www.ietf.org'
+ self.client = Peer(host, 80)
+ self.timeout = 30
+ lastDefer = defer.Deferred()
+
+ def newRequest(path, num, expect, last=False):
+ d = self.client.get(path)
+ d.addCallback(self.gotResp, num, expect)
+ if last:
+ d.addBoth(lastDefer.callback)
+
+ newRequest("/rfc/rfc0006.txt", 1, 1776)
+ newRequest("/rfc/rfc2362.txt", 2, 159833)
+ newRequest("/rfc/rfc0801.txt", 3, 40824)
+ self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0013.txt', 4, 1070))
+ self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0022.txt', 5, 4606))
+ self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0048.txt', 6, 41696))
+ self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc3261.txt', 7, 647976))
+ self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0014.txt', 8, 27))
+ self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0001.txt', 9, 21088))
+ self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc2801.txt', 0, 598794, True))
+ return lastDefer
+
+ def checkInfo(self):
+ log.msg('Rank is: %r' % self.client.rank(250.0*1024))
+ log.msg('Download speed is: %r' % self.client.downloadSpeed())
+ log.msg('Response Time is: %r' % self.client.responseTime())
+
+ def test_peer_info(self):
+ """Test retrieving the peer info during a download."""
+ host = 'www.ietf.org'
+ self.client = Peer(host, 80)
+ self.timeout = 120
+ lastDefer = defer.Deferred()
+
+ def newRequest(path, num, expect, last=False):
+ d = self.client.get(path)
+ d.addCallback(self.gotResp, num, expect)
+ if last:
+ d.addBoth(lastDefer.callback)
+
+ newRequest("/rfc/rfc0006.txt", 1, 1776)
+ newRequest("/rfc/rfc2362.txt", 2, 159833)
+ newRequest("/rfc/rfc0801.txt", 3, 40824)
+ self.pending_calls.append(reactor.callLater(1, newRequest, '/rfc/rfc0013.txt', 4, 1070))
+ self.pending_calls.append(reactor.callLater(10, newRequest, '/rfc/rfc0022.txt', 5, 4606))
+ self.pending_calls.append(reactor.callLater(30, newRequest, '/rfc/rfc0048.txt', 6, 41696))
+ self.pending_calls.append(reactor.callLater(31, newRequest, '/rfc/rfc3261.txt', 7, 647976))
+ self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0014.txt', 8, 27))
+ self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0001.txt', 9, 21088))
+ self.pending_calls.append(reactor.callLater(62, newRequest, '/rfc/rfc2801.txt', 0, 598794, True))
+
+ for i in xrange(2, 122, 2):
+ self.pending_calls.append(reactor.callLater(i, self.checkInfo))
+
+ return lastDefer
+
+ def test_range(self):
+ """Test a Range request."""
+ host = 'www.ietf.org'
+ self.client = Peer(host, 80)
+ self.timeout = 10
+
+ d = self.client.getRange('/rfc/rfc0013.txt', 100, 199)
+ d.addCallback(self.gotResp, 1, 100)
+ return d
+
+ def tearDown(self):
+ for p in self.pending_calls:
+ if p.active():
+ p.cancel()
+ self.pending_calls = []
+ if self.client:
+ self.client.close()
+ self.client = None
--- /dev/null
+
+"""Serve local requests from apt and remote requests from peers."""
+
+from urllib import unquote_plus
+from binascii import b2a_hex
+
+from twisted.python import log
+from twisted.internet import defer
+from twisted.web2 import server, http, resource, channel, stream
+from twisted.web2 import static, http_headers, responsecode
+
+from policies import ThrottlingFactory
+from apt_p2p_Khashmir.bencode import bencode
+
+class FileDownloader(static.File):
+ """Modified to make it suitable for apt requests.
+
+ Tries to find requests in the cache. Found files are first checked for
+ freshness before being sent. Requests for unfound and stale files are
+ forwarded to the main program for downloading.
+
+ @type manager: L{apt_p2p.AptP2P}
+ @ivar manager: the main program to query
+ """
+
+ def __init__(self, path, manager, defaultType="text/plain", ignoredExts=(), processors=None, indexNames=None):
+ self.manager = manager
+ super(FileDownloader, self).__init__(path, defaultType, ignoredExts, processors, indexNames)
+
+ def renderHTTP(self, req):
+ log.msg('Got request for %s from %s' % (req.uri, req.remoteAddr))
+ resp = super(FileDownloader, self).renderHTTP(req)
+ if isinstance(resp, defer.Deferred):
+ resp.addCallback(self._renderHTTP_done, req)
+ else:
+ resp = self._renderHTTP_done(resp, req)
+ return resp
+
+ def _renderHTTP_done(self, resp, req):
+ log.msg('Initial response to %s: %r' % (req.uri, resp))
+
+ if self.manager:
+ path = 'http:/' + req.uri
+ if resp.code >= 200 and resp.code < 400:
+ return self.manager.check_freshness(req, path, resp.headers.getHeader('Last-Modified'), resp)
+
+ log.msg('Not found, trying other methods for %s' % req.uri)
+ return self.manager.get_resp(req, path)
+
+ return resp
+
+ def createSimilarFile(self, path):
+ return self.__class__(path, self.manager, self.defaultType, self.ignoredExts,
+ self.processors, self.indexNames[:])
+
+class FileUploaderStream(stream.FileStream):
+ """Modified to make it suitable for streaming to peers.
+
+ Streams the file is small chunks to make it easier to throttle the
+ streaming to peers.
+
+ @ivar CHUNK_SIZE: the size of chunks of data to send at a time
+ """
+
+ CHUNK_SIZE = 4*1024
+
+ def read(self, sendfile=False):
+ if self.f is None:
+ return None
+
+ length = self.length
+ if length == 0:
+ self.f = None
+ return None
+
+ # Remove the SendFileBuffer and mmap use, just use string reads and writes
+
+ readSize = min(length, self.CHUNK_SIZE)
+
+ self.f.seek(self.start)
+ b = self.f.read(readSize)
+ bytesRead = len(b)
+ if not bytesRead:
+ raise RuntimeError("Ran out of data reading file %r, expected %d more bytes" % (self.f, length))
+ else:
+ self.length -= bytesRead
+ self.start += bytesRead
+ return b
+
+
+class FileUploader(static.File):
+ """Modified to make it suitable for peer requests.
+
+ Uses the modified L{FileUploaderStream} to stream the file for throttling,
+ and doesn't do any listing of directory contents.
+ """
+
+ def render(self, req):
+ if not self.fp.exists():
+ return responsecode.NOT_FOUND
+
+ if self.fp.isdir():
+ # Don't try to render a directory listing
+ return responsecode.NOT_FOUND
+
+ try:
+ f = self.fp.open()
+ except IOError, e:
+ import errno
+ if e[0] == errno.EACCES:
+ return responsecode.FORBIDDEN
+ elif e[0] == errno.ENOENT:
+ return responsecode.NOT_FOUND
+ else:
+ raise
+
+ response = http.Response()
+ # Use the modified FileStream
+ response.stream = FileUploaderStream(f, 0, self.fp.getsize())
+
+ for (header, value) in (
+ ("content-type", self.contentType()),
+ ("content-encoding", self.contentEncoding()),
+ ):
+ if value is not None:
+ response.headers.setHeader(header, value)
+
+ return response
+
+class TopLevel(resource.Resource):
+ """The HTTP server for all requests, both from peers and apt.
+
+ @type directory: L{twisted.python.filepath.FilePath}
+ @ivar directory: the directory to check for cached files
+ @type db: L{db.DB}
+ @ivar db: the database to use for looking up files and hashes
+ @type manager: L{apt_p2p.AptP2P}
+ @ivar manager: the main program object to send requests to
+ @type factory: L{twisted.web2.channel.HTTPFactory} or L{policies.ThrottlingFactory}
+ @ivar factory: the factory to use to server HTTP requests
+
+ """
+
+ addSlash = True
+
+ def __init__(self, directory, db, manager):
+ """Initialize the instance.
+
+ @type directory: L{twisted.python.filepath.FilePath}
+ @param directory: the directory to check for cached files
+ @type db: L{db.DB}
+ @param db: the database to use for looking up files and hashes
+ @type manager: L{apt_p2p.AptP2P}
+ @param manager: the main program object to send requests to
+ """
+ self.directory = directory
+ self.db = db
+ self.manager = manager
+ self.factory = None
+
+ def getHTTPFactory(self):
+ """Initialize and get the factory for this HTTP server."""
+ if self.factory is None:
+ self.factory = channel.HTTPFactory(server.Site(self),
+ **{'maxPipeline': 10,
+ 'betweenRequestsTimeOut': 60})
+ self.factory = ThrottlingFactory(self.factory, writeLimit = 30*1024)
+ return self.factory
+
+ def render(self, ctx):
+ """Render a web page with descriptive statistics."""
+ return http.Response(
+ 200,
+ {'content-type': http_headers.MimeType('text', 'html')},
+ """<html><body>
+ <h2>Statistics</h2>
+ <p>TODO: eventually some stats will be shown here.</body></html>""")
+
+ def locateChild(self, request, segments):
+ """Process the incoming request."""
+ log.msg('Got HTTP request for %s from %s' % (request.uri, request.remoteAddr))
+ name = segments[0]
+
+ # If the request is for a shared file (from a peer)
+ if name == '~':
+ if len(segments) != 2:
+ log.msg('Got a malformed request from %s' % request.remoteAddr)
+ return None, ()
+
+ # Find the file in the database
+ hash = unquote_plus(segments[1])
+ files = self.db.lookupHash(hash)
+ if files:
+ # If it is a file, return it
+ if 'path' in files[0]:
+ log.msg('Sharing %s with %s' % (files[0]['path'].path, request.remoteAddr))
+ return FileUploader(files[0]['path'].path), ()
+ else:
+ # It's not for a file, but for a piece string, so return that
+ log.msg('Sending torrent string %s to %s' % (b2a_hex(hash), request.remoteAddr))
+ return static.Data(bencode({'t': files[0]['pieces']}), 'application/x-bencoded'), ()
+ else:
+ log.msg('Hash could not be found in database: %s' % hash)
+
+ # Only local requests (apt) get past this point
+ if request.remoteAddr.host != "127.0.0.1":
+ log.msg('Blocked illegal access to %s from %s' % (request.uri, request.remoteAddr))
+ return None, ()
+
+ if len(name) > 1:
+ # It's a request from apt
+ return FileDownloader(self.directory.path, self.manager), segments[0:]
+ else:
+ # Will render the statistics page
+ return self, ()
+
+ log.msg('Got a malformed request for "%s" from %s' % (request.uri, request.remoteAddr))
+ return None, ()
+
+if __name__ == '__builtin__':
+ # Running from twistd -ny HTTPServer.py
+ # Then test with:
+ # wget -S 'http://localhost:18080/~/whatever'
+ # wget -S 'http://localhost:18080/~/pieces'
+
+ import os.path
+ from twisted.python.filepath import FilePath
+
+ class DB:
+ def lookupHash(self, hash):
+ if hash == 'pieces':
+ return [{'pieces': 'abcdefghij0123456789\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!'}]
+ return [{'path': FilePath(os.path.expanduser('~/school/optout'))}]
+
+ t = TopLevel(FilePath(os.path.expanduser('~')), DB(), None)
+ factory = t.getHTTPFactory()
+
+ # Standard twisted application Boilerplate
+ from twisted.application import service, strports
+ application = service.Application("demoserver")
+ s = strports.service('tcp:18080', factory)
+ s.setServiceParent(application)
--- /dev/null
+
+"""Hash and store hash information for a file.
+
+@var PIECE_SIZE: the piece size to use for hashing pieces of files
+
+"""
+
+from binascii import b2a_hex, a2b_hex
+import sys
+
+from twisted.internet import threads, defer
+from twisted.trial import unittest
+
+PIECE_SIZE = 512*1024
+
+class HashError(ValueError):
+ """An error has occurred while hashing a file."""
+
+class HashObject:
+ """Manages hashes and hashing for a file.
+
+ @ivar ORDER: the priority ordering of hashes, and how to extract them
+
+ """
+
+ ORDER = [ {'name': 'sha1',
+ 'length': 20,
+ 'AptPkgRecord': 'SHA1Hash',
+ 'AptSrcRecord': False,
+ 'AptIndexRecord': 'SHA1',
+ 'old_module': 'sha',
+ 'hashlib_func': 'sha1',
+ },
+ {'name': 'sha256',
+ 'length': 32,
+ 'AptPkgRecord': 'SHA256Hash',
+ 'AptSrcRecord': False,
+ 'AptIndexRecord': 'SHA256',
+ 'hashlib_func': 'sha256',
+ },
+ {'name': 'md5',
+ 'length': 16,
+ 'AptPkgRecord': 'MD5Hash',
+ 'AptSrcRecord': True,
+ 'AptIndexRecord': 'MD5SUM',
+ 'old_module': 'md5',
+ 'hashlib_func': 'md5',
+ },
+ ]
+
+ def __init__(self, digest = None, size = None, pieces = ''):
+ """Initialize the hash object."""
+ self.hashTypeNum = 0 # Use the first if nothing else matters
+ if sys.version_info < (2, 5):
+ # sha256 is not available in python before 2.5, remove it
+ for hashType in self.ORDER:
+ if hashType['name'] == 'sha256':
+ del self.ORDER[self.ORDER.index(hashType)]
+ break
+
+ self.expHash = None
+ self.expHex = None
+ self.expSize = None
+ self.expNormHash = None
+ self.fileHasher = None
+ self.pieceHasher = None
+ self.fileHash = digest
+ self.pieceHash = [pieces[x:x+self.ORDER[self.hashTypeNum]['length']]
+ for x in xrange(0, len(pieces), self.ORDER[self.hashTypeNum]['length'])]
+ self.size = size
+ self.fileHex = None
+ self.fileNormHash = None
+ self.done = True
+ self.result = None
+
+ #{ Hashing data
+ def new(self, force = False):
+ """Generate a new hashing object suitable for hashing a file.
+
+ @param force: set to True to force creating a new object even if
+ the hash has been verified already
+ """
+ if self.result is None or force:
+ self.result = None
+ self.done = False
+ self.fileHasher = self._new()
+ self.pieceHasher = None
+ self.fileHash = None
+ self.pieceHash = []
+ self.size = 0
+ self.fileHex = None
+ self.fileNormHash = None
+
+ def _new(self):
+ """Create a new hashing object according to the hash type."""
+ if sys.version_info < (2, 5):
+ mod = __import__(self.ORDER[self.hashTypeNum]['old_module'], globals(), locals(), [])
+ return mod.new()
+ else:
+ import hashlib
+ func = getattr(hashlib, self.ORDER[self.hashTypeNum]['hashlib_func'])
+ return func()
+
+ def update(self, data):
+ """Add more data to the file hasher."""
+ if self.result is None:
+ if self.done:
+ raise HashError, "Already done, you can't add more data after calling digest() or verify()"
+ if self.fileHasher is None:
+ raise HashError, "file hasher not initialized"
+
+ if not self.pieceHasher and self.size + len(data) > PIECE_SIZE:
+ # Hash up to the piece size
+ self.fileHasher.update(data[:(PIECE_SIZE - self.size)])
+ data = data[(PIECE_SIZE - self.size):]
+ self.size = PIECE_SIZE
+
+ # Save the first piece digest and initialize a new piece hasher
+ self.pieceHash.append(self.fileHasher.digest())
+ self.pieceHasher = self._new()
+
+ if self.pieceHasher:
+ # Loop in case the data contains multiple pieces
+ piece_size = self.size % PIECE_SIZE
+ while piece_size + len(data) > PIECE_SIZE:
+ # Save the piece hash and start a new one
+ self.pieceHasher.update(data[:(PIECE_SIZE - piece_size)])
+ self.pieceHash.append(self.pieceHasher.digest())
+ self.pieceHasher = self._new()
+
+ # Don't forget to hash the data normally
+ self.fileHasher.update(data[:(PIECE_SIZE - piece_size)])
+ data = data[(PIECE_SIZE - piece_size):]
+ self.size += PIECE_SIZE - piece_size
+ piece_size = self.size % PIECE_SIZE
+
+ # Hash any remaining data
+ self.pieceHasher.update(data)
+
+ self.fileHasher.update(data)
+ self.size += len(data)
+
+ def hashInThread(self, file):
+ """Hashes a file in a separate thread, returning a deferred that will callback with the result."""
+ file.restat(False)
+ if not file.exists():
+ df = defer.Deferred()
+ df.errback(HashError("file not found"))
+ return df
+
+ df = threads.deferToThread(self._hashInThread, file)
+ return df
+
+ def _hashInThread(self, file):
+ """Hashes a file, returning itself as the result."""
+ f = file.open()
+ self.new(force = True)
+ data = f.read(4096)
+ while data:
+ self.update(data)
+ data = f.read(4096)
+ self.digest()
+ return self
+
+ #{ Checking hashes of data
+ def pieceDigests(self):
+ """Get the piece hashes of the added file data."""
+ self.digest()
+ return self.pieceHash
+
+ def digest(self):
+ """Get the hash of the added file data."""
+ if self.fileHash is None:
+ if self.fileHasher is None:
+ raise HashError, "you must hash some data first"
+ self.fileHash = self.fileHasher.digest()
+ self.done = True
+
+ # Save the last piece hash
+ if self.pieceHasher:
+ self.pieceHash.append(self.pieceHasher.digest())
+ return self.fileHash
+
+ def hexdigest(self):
+ """Get the hash of the added file data in hex format."""
+ if self.fileHex is None:
+ self.fileHex = b2a_hex(self.digest())
+ return self.fileHex
+
+ def verify(self):
+ """Verify that the added file data hash matches the expected hash."""
+ if self.result is None and self.fileHash is not None and self.expHash is not None:
+ self.result = (self.fileHash == self.expHash and self.size == self.expSize)
+ return self.result
+
+ #{ Expected hash
+ def expected(self):
+ """Get the expected hash."""
+ return self.expHash
+
+ def hexexpected(self):
+ """Get the expected hash in hex format."""
+ if self.expHex is None and self.expHash is not None:
+ self.expHex = b2a_hex(self.expHash)
+ return self.expHex
+
+ #{ Setting the expected hash
+ def set(self, hashType, hashHex, size):
+ """Initialize the hash object.
+
+ @param hashType: must be one of the dictionaries from L{ORDER}
+ """
+ self.hashTypeNum = self.ORDER.index(hashType) # error if not found
+ self.expHex = hashHex
+ self.expSize = int(size)
+ self.expHash = a2b_hex(self.expHex)
+
+ def setFromIndexRecord(self, record):
+ """Set the hash from the cache of index file records.
+
+ @type record: C{dictionary}
+ @param record: keys are hash types, values are tuples of (hash, size)
+ """
+ for hashType in self.ORDER:
+ result = record.get(hashType['AptIndexRecord'], None)
+ if result:
+ self.set(hashType, result[0], result[1])
+ return True
+ return False
+
+ def setFromPkgRecord(self, record, size):
+ """Set the hash from Apt's binary packages cache.
+
+ @param record: whatever is returned by apt_pkg.GetPkgRecords()
+ """
+ for hashType in self.ORDER:
+ hashHex = getattr(record, hashType['AptPkgRecord'], None)
+ if hashHex:
+ self.set(hashType, hashHex, size)
+ return True
+ return False
+
+ def setFromSrcRecord(self, record):
+ """Set the hash from Apt's source package records cache.
+
+ Currently very simple since Apt only tracks MD5 hashes of source files.
+
+ @type record: (C{string}, C{int}, C{string})
+ @param record: the hash, size and path of the source file
+ """
+ for hashType in self.ORDER:
+ if hashType['AptSrcRecord']:
+ self.set(hashType, record[0], record[1])
+ return True
+ return False
+
+class TestHashObject(unittest.TestCase):
+ """Unit tests for the hash objects."""
+
+ timeout = 5
+ if sys.version_info < (2, 4):
+ skip = "skippingme"
+
+ def test_failure(self):
+ """Tests that the hash object fails when treated badly."""
+ h = HashObject()
+ h.set(h.ORDER[0], b2a_hex('12345678901234567890'), '0')
+ self.failUnlessRaises(HashError, h.digest)
+ self.failUnlessRaises(HashError, h.hexdigest)
+ self.failUnlessRaises(HashError, h.update, 'gfgf')
+
+ def test_pieces(self):
+ """Tests the hashing of large files into pieces."""
+ h = HashObject()
+ h.new()
+ h.update('1234567890'*120*1024)
+ self.failUnless(h.digest() == '1(j\xd2q\x0b\n\x91\xd2\x13\x90\x15\xa3E\xcc\xb0\x8d.\xc3\xc5')
+ pieces = h.pieceDigests()
+ self.failUnless(len(pieces) == 3)
+ self.failUnless(pieces[0] == ',G \xd8\xbbPl\xf1\xa3\xa0\x0cW\n\xe6\xe6a\xc9\x95/\xe5')
+ self.failUnless(pieces[1] == '\xf6V\xeb/\xa8\xad[\x07Z\xf9\x87\xa4\xf5w\xdf\xe1|\x00\x8e\x93')
+ self.failUnless(pieces[2] == 'M[\xbf\xee\xaa+\x19\xbaV\xf699\r\x17o\xcb\x8e\xcfP\x19')
+ h.new(True)
+ for i in xrange(120*1024):
+ h.update('1234567890')
+ pieces = h.pieceDigests()
+ self.failUnless(h.digest() == '1(j\xd2q\x0b\n\x91\xd2\x13\x90\x15\xa3E\xcc\xb0\x8d.\xc3\xc5')
+ self.failUnless(len(pieces) == 3)
+ self.failUnless(pieces[0] == ',G \xd8\xbbPl\xf1\xa3\xa0\x0cW\n\xe6\xe6a\xc9\x95/\xe5')
+ self.failUnless(pieces[1] == '\xf6V\xeb/\xa8\xad[\x07Z\xf9\x87\xa4\xf5w\xdf\xe1|\x00\x8e\x93')
+ self.failUnless(pieces[2] == 'M[\xbf\xee\xaa+\x19\xbaV\xf699\r\x17o\xcb\x8e\xcfP\x19')
+
+ def test_sha1(self):
+ """Test hashing using the SHA1 hash."""
+ h = HashObject()
+ found = False
+ for hashType in h.ORDER:
+ if hashType['name'] == 'sha1':
+ found = True
+ break
+ self.failUnless(found == True)
+ h.set(hashType, '3bba0a5d97b7946ad2632002bf9caefe2cb18e00', '19')
+ h.new()
+ h.update('apt-p2p is the best')
+ self.failUnless(h.hexdigest() == '3bba0a5d97b7946ad2632002bf9caefe2cb18e00')
+ self.failUnlessRaises(HashError, h.update, 'gfgf')
+ self.failUnless(h.verify() == True)
+
+ def test_md5(self):
+ """Test hashing using the MD5 hash."""
+ h = HashObject()
+ found = False
+ for hashType in h.ORDER:
+ if hashType['name'] == 'md5':
+ found = True
+ break
+ self.failUnless(found == True)
+ h.set(hashType, '6b5abdd30d7ed80edd229f9071d8c23c', '19')
+ h.new()
+ h.update('apt-p2p is the best')
+ self.failUnless(h.hexdigest() == '6b5abdd30d7ed80edd229f9071d8c23c')
+ self.failUnlessRaises(HashError, h.update, 'gfgf')
+ self.failUnless(h.verify() == True)
+
+ def test_sha256(self):
+ """Test hashing using the SHA256 hash."""
+ h = HashObject()
+ found = False
+ for hashType in h.ORDER:
+ if hashType['name'] == 'sha256':
+ found = True
+ break
+ self.failUnless(found == True)
+ h.set(hashType, '47f2238a30a0340faa2bf01a9bdc42ba77b07b411cda1e24cd8d7b5c4b7d82a7', '19')
+ h.new()
+ h.update('apt-p2p is the best')
+ self.failUnless(h.hexdigest() == '47f2238a30a0340faa2bf01a9bdc42ba77b07b411cda1e24cd8d7b5c4b7d82a7')
+ self.failUnlessRaises(HashError, h.update, 'gfgf')
+ self.failUnless(h.verify() == True)
+
+ if sys.version_info < (2, 5):
+ test_sha256.skip = "SHA256 hashes are not supported by Python until version 2.5"
--- /dev/null
+
+"""Manage the multiple mirrors that may be requested.
+
+@var aptpkg_dir: the name of the directory to use for mirror files
+"""
+
+from urlparse import urlparse
+import os
+
+from twisted.python import log
+from twisted.python.filepath import FilePath
+from twisted.internet import defer
+from twisted.trial import unittest
+from twisted.web2.http import splitHostPort
+
+from AptPackages import AptPackages
+
+aptpkg_dir='apt-packages'
+
+class MirrorError(Exception):
+ """Exception raised when there's a problem with the mirror."""
+
+class MirrorManager:
+ """Manages all requests for mirror information.
+
+ @type cache_dir: L{twisted.python.filepath.FilePath}
+ @ivar cache_dir: the directory to use for storing all files
+ @type unload_delay: C{int}
+ @ivar unload_delay: the time to wait before unloading the apt cache
+ @type apt_caches: C{dictionary}
+ @ivar apt_caches: the avaliable mirrors
+ """
+
+ def __init__(self, cache_dir, unload_delay):
+ self.cache_dir = cache_dir
+ self.unload_delay = unload_delay
+ self.apt_caches = {}
+
+ def extractPath(self, url):
+ """Break the full URI down into the site, base directory and path.
+
+ Site is the host and port of the mirror. Base directory is the
+ directory to the mirror location (usually just '/debian'). Path is
+ the remaining path to get to the file.
+
+ E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2
+ would return ('ftp.debian.org:80', '/debian',
+ '/dists/sid/binary-i386/Packages.bz2').
+
+ @param url: the URI of the file's location on the mirror
+ @rtype: (C{string}, C{string}, C{string})
+ @return: the site, base directory and path to the file
+ """
+ # Extract the host and port
+ parsed = urlparse(url)
+ host, port = splitHostPort(parsed[0], parsed[1])
+ site = host + ":" + str(port)
+ path = parsed[2]
+
+ # Try to find the base directory (most can be found this way)
+ i = max(path.rfind('/dists/'), path.rfind('/pool/'))
+ if i >= 0:
+ baseDir = path[:i]
+ path = path[i:]
+ else:
+ # Uh oh, this is not good
+ log.msg("Couldn't find a good base directory for path: %s" % (site + path))
+
+ # Try to find an existing cache that starts with this one
+ # (fallback to using an empty base directory)
+ baseDir = ''
+ if site in self.apt_caches:
+ longest_match = 0
+ for base in self.apt_caches[site]:
+ base_match = ''
+ for dirs in path.split('/'):
+ if base.startswith(base_match + '/' + dirs):
+ base_match += '/' + dirs
+ else:
+ break
+ if len(base_match) > longest_match:
+ longest_match = len(base_match)
+ baseDir = base_match
+ log.msg("Settled on baseDir: %s" % baseDir)
+
+ return site, baseDir, path
+
+ def init(self, site, baseDir):
+ """Make sure an L{AptPackages} exists for this mirror."""
+ if site not in self.apt_caches:
+ self.apt_caches[site] = {}
+
+ if baseDir not in self.apt_caches[site]:
+ site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_'))
+ site_cache.makedirs
+ self.apt_caches[site][baseDir] = AptPackages(site_cache, self.unload_delay)
+
+ def updatedFile(self, url, file_path):
+ """A file in the mirror has changed or been added.
+
+ @see: L{AptPackages.PackageFileList.update_file}
+ """
+ site, baseDir, path = self.extractPath(url)
+ self.init(site, baseDir)
+ self.apt_caches[site][baseDir].file_updated(path, file_path)
+
+ def findHash(self, url):
+ """Find the hash for a given url.
+
+ @param url: the URI of the file's location on the mirror
+ @rtype: L{twisted.internet.defer.Deferred}
+ @return: a deferred that will fire with the returned L{Hash.HashObject}
+ """
+ site, baseDir, path = self.extractPath(url)
+ if site in self.apt_caches and baseDir in self.apt_caches[site]:
+ return self.apt_caches[site][baseDir].findHash(path)
+ d = defer.Deferred()
+ d.errback(MirrorError("Site Not Found"))
+ return d
+
+ def cleanup(self):
+ for site in self.apt_caches.keys():
+ for baseDir in self.apt_caches[site].keys():
+ self.apt_caches[site][baseDir].cleanup()
+ del self.apt_caches[site][baseDir]
+ del self.apt_caches[site]
+
+class TestMirrorManager(unittest.TestCase):
+ """Unit tests for the mirror manager."""
+
+ timeout = 20
+ pending_calls = []
+ client = None
+
+ def setUp(self):
+ self.client = MirrorManager(FilePath('/tmp/.apt-p2p'), 300)
+
+ def test_extractPath(self):
+ """Test extracting the site and base directory from various mirrors."""
+ site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
+ self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
+ self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
+ self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
+
+ site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
+ self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
+ self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
+ self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
+
+ site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
+ self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
+ self.failUnless(baseDir == "", "no match: %s" % baseDir)
+ self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
+
+ def verifyHash(self, found_hash, path, true_hash):
+ self.failUnless(found_hash.hexexpected() == true_hash,
+ "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
+
+ def test_findHash(self):
+ """Tests finding the hash of an index file, binary package, source package, and another index file."""
+ # Find the largest index files that are for 'main'
+ self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
+ self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
+
+ # Find the Release file corresponding to the found Packages file
+ for f in os.walk('/var/lib/apt/lists').next()[2]:
+ if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
+ self.releaseFile = f
+ break
+
+ # Add all the found files to the mirror
+ self.client.updatedFile('http://' + self.releaseFile.replace('_','/'),
+ FilePath('/var/lib/apt/lists/' + self.releaseFile))
+ self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
+ self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'),
+ FilePath('/var/lib/apt/lists/' + self.packagesFile))
+ self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
+ self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'),
+ FilePath('/var/lib/apt/lists/' + self.sourcesFile))
+
+ lastDefer = defer.Deferred()
+
+ # Lookup a Packages.bz2 file
+ idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
+ '/var/lib/apt/lists/' + self.releaseFile +
+ ' | grep -E " main/binary-i386/Packages.bz2$"'
+ ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
+ idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
+
+ d = self.client.findHash(idx_path)
+ d.addCallback(self.verifyHash, idx_path, idx_hash)
+
+ # Lookup the binary 'dpkg' package
+ pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.packagesFile +
+ ' | grep -E "^SHA1:" | head -n 1' +
+ ' | cut -d\ -f 2').read().rstrip('\n')
+ pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
+ os.popen('grep -A 30 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.packagesFile +
+ ' | grep -E "^Filename:" | head -n 1' +
+ ' | cut -d\ -f 2').read().rstrip('\n')
+
+ d = self.client.findHash(pkg_path)
+ d.addCallback(self.verifyHash, pkg_path, pkg_hash)
+
+ # Lookup the source 'dpkg' package
+ src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.sourcesFile +
+ ' | grep -E "^Directory:" | head -n 1' +
+ ' | cut -d\ -f 2').read().rstrip('\n')
+ src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.sourcesFile +
+ ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
+ ' | cut -d\ -f 2').read().split('\n')[:-1]
+ src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
+ '/var/lib/apt/lists/' + self.sourcesFile +
+ ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
+ ' | cut -d\ -f 4').read().split('\n')[:-1]
+
+ for i in range(len(src_hashes)):
+ src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
+ d = self.client.findHash(src_path)
+ d.addCallback(self.verifyHash, src_path, src_hashes[i])
+
+ # Lookup a Sources.bz2 file
+ idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
+ '/var/lib/apt/lists/' + self.releaseFile +
+ ' | grep -E " main/source/Sources.bz2$"'
+ ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
+ idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
+
+ d = self.client.findHash(idx_path)
+ d.addCallback(self.verifyHash, idx_path, idx_hash)
+
+ d.addBoth(lastDefer.callback)
+ return lastDefer
+
+ def tearDown(self):
+ for p in self.pending_calls:
+ if p.active():
+ p.cancel()
+ self.client.cleanup()
+ self.client = None
+
\ No newline at end of file
--- /dev/null
+
+"""Manage a set of peers and the requests to them."""
+
+from random import choice
+from urlparse import urlparse, urlunparse
+from urllib import quote_plus
+
+from twisted.internet import reactor, defer
+from twisted.python import log
+from twisted.trial import unittest
+from twisted.web2 import stream as stream_mod
+from twisted.web2.http import splitHostPort
+
+from HTTPDownloader import Peer
+from util import uncompact
+
+class PeerManager:
+ """Manage a set of peers and the requests to them.
+
+ @type clients: C{dictionary}
+ @ivar clients: the available peers that have been previously contacted
+ """
+
+ def __init__(self):
+ """Initialize the instance."""
+ self.clients = {}
+
+ def get(self, hash, mirror, peers = [], method="GET", modtime=None):
+ """Download from a list of peers or fallback to a mirror.
+
+ @type hash: L{Hash.HashObject}
+ @param hash: the hash object containing the expected hash for the file
+ @param mirror: the URI of the file on the mirror
+ @type peers: C{list} of C{string}
+ @param peers: a list of the peer info where the file can be found
+ (optional, defaults to downloading from the mirror)
+ @type method: C{string}
+ @param method: the HTTP method to use, 'GET' or 'HEAD'
+ (optional, defaults to 'GET')
+ @type modtime: C{int}
+ @param modtime: the modification time to use for an 'If-Modified-Since'
+ header, as seconds since the epoch
+ (optional, defaults to not sending that header)
+ """
+ if peers:
+ # Choose one of the peers at random
+ compact_peer = choice(peers)
+ peer = uncompact(compact_peer['c'])
+ log.msg('Downloading from peer %r' % (peer, ))
+ site = peer
+ path = '/~/' + quote_plus(hash.expected())
+ else:
+ log.msg('Downloading (%s) from mirror %s' % (method, mirror))
+ parsed = urlparse(mirror)
+ assert parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0]
+ site = splitHostPort(parsed[0], parsed[1])
+ path = urlunparse(('', '') + parsed[2:])
+
+ return self.getPeer(site, path, method, modtime)
+
+ def getPeer(self, site, path, method="GET", modtime=None):
+ """Create a new peer if necessary and forward the request to it.
+
+ @type site: (C{string}, C{int})
+ @param site: the IP address and port of the peer
+ @type path: C{string}
+ @param path: the path to the file on the peer
+ @type method: C{string}
+ @param method: the HTTP method to use, 'GET' or 'HEAD'
+ (optional, defaults to 'GET')
+ @type modtime: C{int}
+ @param modtime: the modification time to use for an 'If-Modified-Since'
+ header, as seconds since the epoch
+ (optional, defaults to not sending that header)
+ """
+ if site not in self.clients:
+ self.clients[site] = Peer(site[0], site[1])
+ return self.clients[site].get(path, method, modtime)
+
+ def close(self):
+ """Close all the connections to peers."""
+ for site in self.clients:
+ self.clients[site].close()
+ self.clients = {}
+
+class TestPeerManager(unittest.TestCase):
+ """Unit tests for the PeerManager."""
+
+ manager = None
+ pending_calls = []
+
+ def gotResp(self, resp, num, expect):
+ self.failUnless(resp.code >= 200 and resp.code < 300, "Got a non-200 response: %r" % resp.code)
+ if expect is not None:
+ self.failUnless(resp.stream.length == expect, "Length was incorrect, got %r, expected %r" % (resp.stream.length, expect))
+ def print_(n):
+ pass
+ def printdone(n):
+ pass
+ stream_mod.readStream(resp.stream, print_).addCallback(printdone)
+
+ def test_download(self):
+ """Tests a normal download."""
+ self.manager = PeerManager()
+ self.timeout = 10
+
+ host = 'www.ietf.org'
+ d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt')
+ d.addCallback(self.gotResp, 1, 1070)
+ return d
+
+ def test_head(self):
+ """Tests a 'HEAD' request."""
+ self.manager = PeerManager()
+ self.timeout = 10
+
+ host = 'www.ietf.org'
+ d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt', method = "HEAD")
+ d.addCallback(self.gotResp, 1, 0)
+ return d
+
+ def test_multiple_downloads(self):
+ """Tests multiple downloads with queueing and connection closing."""
+ self.manager = PeerManager()
+ self.timeout = 120
+ lastDefer = defer.Deferred()
+
+ def newRequest(host, path, num, expect, last=False):
+ d = self.manager.get('', 'http://' + host + ':' + str(80) + path)
+ d.addCallback(self.gotResp, num, expect)
+ if last:
+ d.addBoth(lastDefer.callback)
+
+ newRequest('www.ietf.org', "/rfc/rfc0006.txt", 1, 1776)
+ newRequest('www.ietf.org', "/rfc/rfc2362.txt", 2, 159833)
+ newRequest('www.google.ca', "/", 3, None)
+ self.pending_calls.append(reactor.callLater(1, newRequest, 'www.sfu.ca', '/', 4, None))
+ self.pending_calls.append(reactor.callLater(10, newRequest, 'www.ietf.org', '/rfc/rfc0048.txt', 5, 41696))
+ self.pending_calls.append(reactor.callLater(30, newRequest, 'www.ietf.org', '/rfc/rfc0022.txt', 6, 4606))
+ self.pending_calls.append(reactor.callLater(31, newRequest, 'www.sfu.ca', '/studentcentral/index.html', 7, None))
+ self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0014.txt', 8, 27))
+ self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0001.txt', 9, 21088))
+ self.pending_calls.append(reactor.callLater(62, newRequest, 'www.google.ca', '/intl/en/options/', 0, None, True))
+ return lastDefer
+
+ def tearDown(self):
+ for p in self.pending_calls:
+ if p.active():
+ p.cancel()
+ self.pending_calls = []
+ if self.manager:
+ self.manager.close()
+ self.manager = None
--- /dev/null
+
+"""The main apt-p2p modules.
+
+To run apt-p2p, you probably want to do something like::
+
+ from apt_p2p.apt_p2p import AptP2P
+ myapp = AptP2P(myDHT)
+
+where myDHT is a DHT that implements interfaces.IDHT.
+
+Diagram of the interaction between the given modules::
+
+ +---------------+ +-----------------------------------+ +-------------
+ | AptP2P | | DHT | | Internet
+ | |--->|join DHT|----|--\
+ | |--->|loadConfig | | | Another
+ | |--->|getValue | | | Node
+ | |--->|storeValue DHT|<---|--/
+ | |--->|leave | |
+ | | +-----------------------------------+ |
+ | | +-------------+ +----------------+ |
+ | | | PeerManager | | HTTPDownloader*| |
+ | |--->|get |--->|get HTTP|----|---> Mirror
+ | | | |--->|getRange | |
+ | |--->|close |--->|close HTTP|----|--\
+ | | +-------------+ +----------------+ | | Another
+ | | +-----------------------------------+ | | Peer
+ | | | HTTPServer HTTP|<---|--/
+ | |--->|getHTTPFactory | +-------------
+ |check_freshness|<---| | +-------------
+ | get_resp|<---| HTTP|<---|HTTP Request
+ | | +-----------------------------------+ |
+ | | +---------------+ +--------------+ | Local Net
+ | | | CacheManager | | ProxyFile- | | (apt)
+ | |--->|scanDirectories| | Stream* | |
+ | |--->|save_file |--->|__init__ HTTP|--->|HTTP Response
+ | |--->|save_error | | | +-------------
+ | | | | | | +-------------
+ |new_cached_file|<---| | | file|--->|write file
+ | | +---------------+ +--------------+ |
+ | | +---------------+ +--------------+ | Filesystem
+ | | | MirrorManager | | AptPackages* | |
+ | |--->|updatedFile |--->|file_updated | |
+ | |--->|findHash |--->|findHash file|<---|read file
+ +---------------+ +---------------+ +--------------+ +-------------
+
+"""
--- /dev/null
+
+"""The main program code.
+
+@var DHT_PIECES: the maximum number of pieces to store with our contact info
+ in the DHT
+@var TORRENT_PIECES: the maximum number of pieces to store as a separate entry
+ in the DHT
+@var download_dir: the name of the directory to use for downloaded files
+
+"""
+
+from binascii import b2a_hex
+from urlparse import urlunparse
+import os, re, sha
+
+from twisted.internet import defer, reactor
+from twisted.web2 import server, http, http_headers, static
+from twisted.python import log, failure
+from twisted.python.filepath import FilePath
+
+from apt_p2p_conf import config
+from PeerManager import PeerManager
+from HTTPServer import TopLevel
+from MirrorManager import MirrorManager
+from CacheManager import CacheManager
+from Hash import HashObject
+from db import DB
+from util import findMyIPAddr, compact
+
+DHT_PIECES = 4
+TORRENT_PIECES = 70
+
+download_dir = 'cache'
+
+class AptP2P:
+ """The main code object that does all of the work.
+
+ Contains all of the sub-components that do all the low-level work, and
+ coordinates communication between them.
+
+ @type cache_dir: L{twisted.python.filepath.FilePath}
+ @ivar cache_dir: the directory to use for storing all files
+ @type db: L{db.DB}
+ @ivar db: the database to use for tracking files and hashes
+ @type dht: L{interfaces.IDHT}
+ @ivar dht: the DHT instance to use
+ @type http_server: L{HTTPServer.TopLevel}
+ @ivar http_server: the web server that will handle all requests from apt
+ and from other peers
+ @type peers: L{PeerManager.PeerManager}
+ @ivar peers: the manager of all downloads from mirrors and other peers
+ @type mirrors: L{MirrorManager.MirrorManager}
+ @ivar mirrors: the manager of downloaded information about mirrors which
+ can be queried to get hashes from file names
+ @type cache: L{CacheManager.CacheManager}
+ @ivar cache: the manager of all downloaded files
+ @type my_contact: C{string}
+ @ivar my_contact: the 6-byte compact peer representation of this peer's
+ download information (IP address and port)
+ """
+
+ def __init__(self, dht):
+ """Initialize all the sub-components.
+
+ @type dht: L{interfaces.IDHT}
+ @param dht: the DHT instance to use
+ """
+ log.msg('Initializing the main apt_p2p application')
+ self.cache_dir = FilePath(config.get('DEFAULT', 'cache_dir'))
+ if not self.cache_dir.child(download_dir).exists():
+ self.cache_dir.child(download_dir).makedirs()
+ self.db = DB(self.cache_dir.child('apt-p2p.db'))
+ self.dht = dht
+ self.dht.loadConfig(config, config.get('DEFAULT', 'DHT'))
+ self.dht.join().addCallbacks(self.joinComplete, self.joinError)
+ self.http_server = TopLevel(self.cache_dir.child(download_dir), self.db, self)
+ self.getHTTPFactory = self.http_server.getHTTPFactory
+ self.peers = PeerManager()
+ self.mirrors = MirrorManager(self.cache_dir, config.gettime('DEFAULT', 'UNLOAD_PACKAGES_CACHE'))
+ other_dirs = [FilePath(f) for f in config.getstringlist('DEFAULT', 'OTHER_DIRS')]
+ self.cache = CacheManager(self.cache_dir.child(download_dir), self.db, other_dirs, self)
+ self.my_contact = None
+
+ #{ DHT maintenance
+ def joinComplete(self, result):
+ """Complete the DHT join process and determine our download information.
+
+ Called by the DHT when the join has been completed with information
+ on the external IP address and port of this peer.
+ """
+ my_addr = findMyIPAddr(result,
+ config.getint(config.get('DEFAULT', 'DHT'), 'PORT'),
+ config.getboolean('DEFAULT', 'LOCAL_OK'))
+ if not my_addr:
+ raise RuntimeError, "IP address for this machine could not be found"
+ self.my_contact = compact(my_addr, config.getint('DEFAULT', 'PORT'))
+ self.cache.scanDirectories()
+ reactor.callLater(60, self.refreshFiles)
+
+ def joinError(self, failure):
+ """Joining the DHT has failed."""
+ log.msg("joining DHT failed miserably")
+ log.err(failure)
+ raise RuntimeError, "IP address for this machine could not be found"
+
+ def refreshFiles(self):
+ """Refresh any files in the DHT that are about to expire."""
+ expireAfter = config.gettime('DEFAULT', 'KEY_REFRESH')
+ hashes = self.db.expiredHashes(expireAfter)
+ if len(hashes.keys()) > 0:
+ log.msg('Refreshing the keys of %d DHT values' % len(hashes.keys()))
+ self._refreshFiles(None, hashes)
+
+ def _refreshFiles(self, result, hashes):
+ if result is not None:
+ log.msg('Storage resulted in: %r' % result)
+
+ if hashes:
+ raw_hash = hashes.keys()[0]
+ self.db.refreshHash(raw_hash)
+ hash = HashObject(raw_hash, pieces = hashes[raw_hash]['pieces'])
+ del hashes[raw_hash]
+ storeDefer = self.store(hash)
+ storeDefer.addBoth(self._refreshFiles, hashes)
+ else:
+ reactor.callLater(60, self.refreshFiles)
+
+ #{ Main workflow
+ def check_freshness(self, req, url, modtime, resp):
+ """Send a HEAD to the mirror to check if the response from the cache is still valid.
+
+ @type req: L{twisted.web2.http.Request}
+ @param req: the initial request sent to the HTTP server by apt
+ @param url: the URI of the actual mirror request
+ @type modtime: C{int}
+ @param modtime: the modified time of the cached file (seconds since epoch)
+ @type resp: L{twisted.web2.http.Response}
+ @param resp: the response from the cache to be sent to apt
+ @rtype: L{twisted.internet.defer.Deferred}
+ @return: a deferred that will be called back with the correct response
+ """
+ log.msg('Checking if %s is still fresh' % url)
+ d = self.peers.get('', url, method = "HEAD", modtime = modtime)
+ d.addCallback(self.check_freshness_done, req, url, resp)
+ return d
+
+ def check_freshness_done(self, resp, req, url, orig_resp):
+ """Process the returned response from the mirror.
+
+ @type resp: L{twisted.web2.http.Response}
+ @param resp: the response from the mirror to the HEAD request
+ @type req: L{twisted.web2.http.Request}
+ @param req: the initial request sent to the HTTP server by apt
+ @param url: the URI of the actual mirror request
+ @type orig_resp: L{twisted.web2.http.Response}
+ @param orig_resp: the response from the cache to be sent to apt
+ """
+ if resp.code == 304:
+ log.msg('Still fresh, returning: %s' % url)
+ return orig_resp
+ else:
+ log.msg('Stale, need to redownload: %s' % url)
+ return self.get_resp(req, url)
+
+ def get_resp(self, req, url):
+ """Lookup a hash for the file in the local mirror info.
+
+ Starts the process of getting a response to an uncached apt request.
+
+ @type req: L{twisted.web2.http.Request}
+ @param req: the initial request sent to the HTTP server by apt
+ @param url: the URI of the actual mirror request
+ @rtype: L{twisted.internet.defer.Deferred}
+ @return: a deferred that will be called back with the response
+ """
+ d = defer.Deferred()
+
+ log.msg('Trying to find hash for %s' % url)
+ findDefer = self.mirrors.findHash(url)
+
+ findDefer.addCallbacks(self.findHash_done, self.findHash_error,
+ callbackArgs=(req, url, d), errbackArgs=(req, url, d))
+ findDefer.addErrback(log.err)
+ return d
+
+ def findHash_error(self, failure, req, url, d):
+ """Process the error in hash lookup by returning an empty L{HashObject}."""
+ log.err(failure)
+ self.findHash_done(HashObject(), req, url, d)
+
+ def findHash_done(self, hash, req, url, d):
+ """Use the returned hash to lookup the file in the cache.
+
+ If the hash was not found, the workflow skips down to download from
+ the mirror (L{lookupHash_done}).
+
+ @type hash: L{Hash.HashObject}
+ @param hash: the hash object containing the expected hash for the file
+ """
+ if hash.expected() is None:
+ log.msg('Hash for %s was not found' % url)
+ self.lookupHash_done([], hash, url, d)
+ else:
+ log.msg('Found hash %s for %s' % (hash.hexexpected(), url))
+
+ # Lookup hash in cache
+ locations = self.db.lookupHash(hash.expected(), filesOnly = True)
+ self.getCachedFile(hash, req, url, d, locations)
+
+ def getCachedFile(self, hash, req, url, d, locations):
+ """Try to return the file from the cache, otherwise move on to a DHT lookup.
+
+ @type locations: C{list} of C{dictionary}
+ @param locations: the files in the cache that match the hash,
+ the dictionary contains a key 'path' whose value is a
+ L{twisted.python.filepath.FilePath} object for the file.
+ """
+ if not locations:
+ log.msg('Failed to return file from cache: %s' % url)
+ self.lookupHash(hash, url, d)
+ return
+
+ # Get the first possible location from the list
+ file = locations.pop(0)['path']
+ log.msg('Returning cached file: %s' % file.path)
+
+ # Get it's response
+ resp = static.File(file.path).renderHTTP(req)
+ if isinstance(resp, defer.Deferred):
+ resp.addBoth(self._getCachedFile, hash, req, url, d, locations)
+ else:
+ self._getCachedFile(resp, hash, req, url, d, locations)
+
+ def _getCachedFile(self, resp, hash, req, url, d, locations):
+ """Check the returned response to be sure it is valid."""
+ if isinstance(resp, failure.Failure):
+ log.msg('Got error trying to get cached file')
+ log.err()
+ # Try the next possible location
+ self.getCachedFile(hash, req, url, d, locations)
+ return
+
+ log.msg('Cached response: %r' % resp)
+
+ if resp.code >= 200 and resp.code < 400:
+ d.callback(resp)
+ else:
+ # Try the next possible location
+ self.getCachedFile(hash, req, url, d, locations)
+
+ def lookupHash(self, hash, url, d):
+ """Lookup the hash in the DHT."""
+ log.msg('Looking up hash in DHT for file: %s' % url)
+ key = hash.expected()
+ lookupDefer = self.dht.getValue(key)
+ lookupDefer.addCallback(self.lookupHash_done, hash, url, d)
+
+ def lookupHash_done(self, values, hash, url, d):
+ """Start the download of the file.
+
+ The download will be from peers if the DHT lookup succeeded, or
+ from the mirror otherwise.
+
+ @type values: C{list} of C{dictionary}
+ @param values: the returned values from the DHT containing peer
+ download information
+ """
+ if not values:
+ log.msg('Peers for %s were not found' % url)
+ getDefer = self.peers.get(hash, url)
+ getDefer.addCallback(self.cache.save_file, hash, url)
+ getDefer.addErrback(self.cache.save_error, url)
+ getDefer.addCallbacks(d.callback, d.errback)
+ else:
+ log.msg('Found peers for %s: %r' % (url, values))
+ # Download from the found peers
+ getDefer = self.peers.get(hash, url, values)
+ getDefer.addCallback(self.check_response, hash, url)
+ getDefer.addCallback(self.cache.save_file, hash, url)
+ getDefer.addErrback(self.cache.save_error, url)
+ getDefer.addCallbacks(d.callback, d.errback)
+
+ def check_response(self, response, hash, url):
+ """Check the response from peers, and download from the mirror if it is not."""
+ if response.code < 200 or response.code >= 300:
+ log.msg('Download from peers failed, going to direct download: %s' % url)
+ getDefer = self.peers.get(hash, url)
+ return getDefer
+ return response
+
+ def new_cached_file(self, file_path, hash, new_hash, url = None, forceDHT = False):
+ """Add a newly cached file to the mirror info and/or the DHT.
+
+ If the file was downloaded, set url to the path it was downloaded for.
+ Doesn't add a file to the DHT unless a hash was found for it
+ (but does add it anyway if forceDHT is True).
+
+ @type file_path: L{twisted.python.filepath.FilePath}
+ @param file_path: the location of the file in the local cache
+ @type hash: L{Hash.HashObject}
+ @param hash: the original (expected) hash object containing also the
+ hash of the downloaded file
+ @type new_hash: C{boolean}
+ @param new_hash: whether the has was new to this peer, and so should
+ be added to the DHT
+ @type url: C{string}
+ @param url: the URI of the location of the file in the mirror
+ (optional, defaults to not adding the file to the mirror info)
+ @type forceDHT: C{boolean}
+ @param forceDHT: whether to force addition of the file to the DHT
+ even if the hash was not found in a mirror
+ (optional, defaults to False)
+ """
+ if url:
+ self.mirrors.updatedFile(url, file_path)
+
+ if self.my_contact and hash and new_hash and (hash.expected() is not None or forceDHT):
+ return self.store(hash)
+ return None
+
+ def store(self, hash):
+ """Add a key/value pair for the file to the DHT.
+
+ Sets the key and value from the hash information, and tries to add
+ it to the DHT.
+ """
+ key = hash.digest()
+ value = {'c': self.my_contact}
+ pieces = hash.pieceDigests()
+
+ # Determine how to store any piece data
+ if len(pieces) <= 1:
+ pass
+ elif len(pieces) <= DHT_PIECES:
+ # Short enough to be stored with our peer contact info
+ value['t'] = {'t': ''.join(pieces)}
+ elif len(pieces) <= TORRENT_PIECES:
+ # Short enough to be stored in a separate key in the DHT
+ s = sha.new().update(''.join(pieces))
+ value['h'] = s.digest()
+ else:
+ # Too long, must be served up by our peer HTTP server
+ s = sha.new().update(''.join(pieces))
+ value['l'] = s.digest()
+
+ storeDefer = self.dht.storeValue(key, value)
+ storeDefer.addCallback(self.store_done, hash)
+ return storeDefer
+
+ def store_done(self, result, hash):
+ """Add a key/value pair for the pieces of the file to the DHT (if necessary)."""
+ log.msg('Added %s to the DHT: %r' % (hash.hexdigest(), result))
+ pieces = hash.pieceDigests()
+ if len(pieces) > DHT_PIECES and len(pieces) <= TORRENT_PIECES:
+ # Add the piece data key and value to the DHT
+ s = sha.new().update(''.join(pieces))
+ key = s.digest()
+ value = {'t': ''.join(pieces)}
+
+ storeDefer = self.dht.storeValue(key, value)
+ storeDefer.addCallback(self.store_torrent_done, key)
+ return storeDefer
+ return result
+
+ def store_torrent_done(self, result, key):
+ """Adding the file to the DHT is complete, and so is the workflow."""
+ log.msg('Added torrent string %s to the DHT: %r' % (b2ahex(key), result))
+ return result
+
\ No newline at end of file
--- /dev/null
+
+"""Loading of configuration files and parameters.
+
+@type version: L{twisted.python.versions.Version}
+@var version: the version of this program
+@type DEFAULT_CONFIG_FILES: C{list} of C{string}
+@var DEFAULT_CONFIG_FILES: the default config files to load (in order)
+@var DEFAULTS: the default config parameter values for the main program
+@var DHT_DEFAULTS: the default config parameter values for the default DHT
+
+"""
+
+import os, sys
+from ConfigParser import SafeConfigParser
+
+from twisted.python import log, versions
+
+class ConfigError(Exception):
+ """Errors that occur in the loading of configuration variables."""
+ def __init__(self, message):
+ self.message = message
+ def __str__(self):
+ return repr(self.message)
+
+version = versions.Version('apt-p2p', 0, 0, 0)
+
+# Set the home parameter
+home = os.path.expandvars('${HOME}')
+if home == '${HOME}' or not os.path.isdir(home):
+ home = os.path.expanduser('~')
+ if not os.path.isdir(home):
+ home = os.path.abspath(os.path.dirname(sys.argv[0]))
+
+DEFAULT_CONFIG_FILES=['/etc/apt-p2p/apt-p2p.conf',
+ home + '/.apt-p2p/apt-p2p.conf']
+
+DEFAULTS = {
+
+ # Port to listen on for all requests (TCP and UDP)
+ 'PORT': '9977',
+
+ # Directory to store the downloaded files in
+ 'CACHE_DIR': home + '/.apt-p2p/cache',
+
+ # Other directories containing packages to share with others
+ # WARNING: all files in these directories will be hashed and available
+ # for everybody to download
+ 'OTHER_DIRS': """""",
+
+ # User name to try and run as
+ 'USERNAME': '',
+
+ # Whether it's OK to use an IP addres from a known local/private range
+ 'LOCAL_OK': 'no',
+
+ # Unload the packages cache after an interval of inactivity this long.
+ # The packages cache uses a lot of memory, and only takes a few seconds
+ # to reload when a new request arrives.
+ 'UNLOAD_PACKAGES_CACHE': '5m',
+
+ # Refresh the DHT keys after this much time has passed.
+ # This should be a time slightly less than the DHT's KEY_EXPIRE value.
+ 'KEY_REFRESH': '57m',
+
+ # Which DHT implementation to use.
+ # It must be possile to do "from <DHT>.DHT import DHT" to get a class that
+ # implements the IDHT interface.
+ 'DHT': 'apt_p2p_Khashmir',
+
+ # Whether to only run the DHT (for providing only a bootstrap node)
+ 'DHT-ONLY': 'no',
+}
+
+DHT_DEFAULTS = {
+ # bootstrap nodes to contact to join the DHT
+ 'BOOTSTRAP': """www.camrdale.org:9977
+ steveholt.hopto.org:9976""",
+
+ # whether this node is a bootstrap node
+ 'BOOTSTRAP_NODE': "no",
+
+ # Kademlia "K" constant, this should be an even number
+ 'K': '8',
+
+ # SHA1 is 160 bits long
+ 'HASH_LENGTH': '160',
+
+ # checkpoint every this many seconds
+ 'CHECKPOINT_INTERVAL': '5m', # five minutes
+
+ ### SEARCHING/STORING
+ # concurrent xmlrpc calls per find node/value request!
+ 'CONCURRENT_REQS': '4',
+
+ # how many hosts to post to
+ 'STORE_REDUNDANCY': '3',
+
+ # How many values to attempt to retrieve from the DHT.
+ # Setting this to 0 will try and get all values (which could take a while if
+ # a lot of nodes have values). Setting it negative will try to get that
+ # number of results from only the closest STORE_REDUNDANCY nodes to the hash.
+ # The default is a large negative number so all values from the closest
+ # STORE_REDUNDANCY nodes will be retrieved.
+ 'RETRIEVE_VALUES': '-10000',
+
+ ### ROUTING TABLE STUFF
+ # how many times in a row a node can fail to respond before it's booted from the routing table
+ 'MAX_FAILURES': '3',
+
+ # never ping a node more often than this
+ 'MIN_PING_INTERVAL': '15m', # fifteen minutes
+
+ # refresh buckets that haven't been touched in this long
+ 'BUCKET_STALENESS': '1h', # one hour
+
+ # expire entries older than this
+ 'KEY_EXPIRE': '1h', # 60 minutes
+
+ # whether to spew info about the requests/responses in the protocol
+ 'SPEW': 'yes',
+}
+
+class AptP2PConfigParser(SafeConfigParser):
+ """Adds 'gettime' and 'getstringlist' to ConfigParser objects.
+
+ @ivar time_multipliers: the 'gettime' suffixes and the multipliers needed
+ to convert them to seconds
+ """
+
+ time_multipliers={
+ 's': 1, #seconds
+ 'm': 60, #minutes
+ 'h': 3600, #hours
+ 'd': 86400,#days
+ }
+
+ def gettime(self, section, option):
+ """Read the config parameter as a time value."""
+ mult = 1
+ value = self.get(section, option)
+ if len(value) == 0:
+ raise ConfigError("Configuration parse error: [%s] %s" % (section, option))
+ suffix = value[-1].lower()
+ if suffix in self.time_multipliers.keys():
+ mult = self.time_multipliers[suffix]
+ value = value[:-1]
+ return int(value)*mult
+
+ def getstring(self, section, option):
+ """Read the config parameter as a string."""
+ return self.get(section,option)
+
+ def getstringlist(self, section, option):
+ """Read the multi-line config parameter as a list of strings."""
+ return self.get(section,option).split()
+
+ def optionxform(self, option):
+ """Use all uppercase in the config parameters names."""
+ return option.upper()
+
+# Initialize the default config parameters
+config = AptP2PConfigParser(DEFAULTS)
+config.add_section(config.get('DEFAULT', 'DHT'))
+for k in DHT_DEFAULTS:
+ config.set(config.get('DEFAULT', 'DHT'), k, DHT_DEFAULTS[k])
--- /dev/null
+
+"""An sqlite database for storing persistent files and hashes."""
+
+from datetime import datetime, timedelta
+from pysqlite2 import dbapi2 as sqlite
+from binascii import a2b_base64, b2a_base64
+from time import sleep
+import os
+
+from twisted.python.filepath import FilePath
+from twisted.trial import unittest
+
+assert sqlite.version_info >= (2, 1)
+
+class DBExcept(Exception):
+ """An error occurred in accessing the database."""
+ pass
+
+class khash(str):
+ """Dummy class to convert all hashes to base64 for storing in the DB."""
+
+# Initialize the database to work with 'khash' objects (binary strings)
+sqlite.register_adapter(khash, b2a_base64)
+sqlite.register_converter("KHASH", a2b_base64)
+sqlite.register_converter("khash", a2b_base64)
+sqlite.enable_callback_tracebacks(True)
+
+class DB:
+ """An sqlite database for storing persistent files and hashes.
+
+ @type db: L{twisted.python.filepath.FilePath}
+ @ivar db: the database file to use
+ @type conn: L{pysqlite2.dbapi2.Connection}
+ @ivar conn: an open connection to the sqlite database
+ """
+
+ def __init__(self, db):
+ """Load or create the database file.
+
+ @type db: L{twisted.python.filepath.FilePath}
+ @param db: the database file to use
+ """
+ self.db = db
+ self.db.restat(False)
+ if self.db.exists():
+ self._loadDB()
+ else:
+ self._createNewDB()
+ self.conn.text_factory = str
+ self.conn.row_factory = sqlite.Row
+
+ def _loadDB(self):
+ """Open a new connection to the existing database file"""
+ try:
+ self.conn = sqlite.connect(database=self.db.path, detect_types=sqlite.PARSE_DECLTYPES)
+ except:
+ import traceback
+ raise DBExcept, "Couldn't open DB", traceback.format_exc()
+
+ def _createNewDB(self):
+ """Open a connection to a new database and create the necessary tables."""
+ if not self.db.parent().exists():
+ self.db.parent().makedirs()
+ self.conn = sqlite.connect(database=self.db.path, detect_types=sqlite.PARSE_DECLTYPES)
+ c = self.conn.cursor()
+ c.execute("CREATE TABLE files (path TEXT PRIMARY KEY UNIQUE, hashID INTEGER, " +
+ "size NUMBER, mtime NUMBER)")
+ c.execute("CREATE TABLE hashes (hashID INTEGER PRIMARY KEY AUTOINCREMENT, " +
+ "hash KHASH UNIQUE, pieces KHASH, " +
+ "piecehash KHASH, refreshed TIMESTAMP)")
+ c.execute("CREATE INDEX hashes_refreshed ON hashes(refreshed)")
+ c.execute("CREATE INDEX hashes_piecehash ON hashes(piecehash)")
+ c.close()
+ self.conn.commit()
+
+ def _removeChanged(self, file, row):
+ """If the file has changed or is missing, remove it from the DB.
+
+ @type file: L{twisted.python.filepath.FilePath}
+ @param file: the file to check
+ @type row: C{dictionary}-like object
+ @param row: contains the expected 'size' and 'mtime' of the file
+ @rtype: C{boolean}
+ @return: True if the file is unchanged, False if it is changed,
+ and None if it is missing
+ """
+ res = None
+ if row:
+ file.restat(False)
+ if file.exists():
+ # Compare the current with the expected file properties
+ res = (row['size'] == file.getsize() and row['mtime'] == file.getmtime())
+ if not res:
+ # Remove the file from the database
+ c = self.conn.cursor()
+ c.execute("DELETE FROM files WHERE path = ?", (file.path, ))
+ self.conn.commit()
+ c.close()
+ return res
+
+ def storeFile(self, file, hash, pieces = ''):
+ """Store or update a file in the database.
+
+ @type file: L{twisted.python.filepath.FilePath}
+ @param file: the file to check
+ @type hash: C{string}
+ @param hash: the hash of the file
+ @type pieces: C{string}
+ @param pieces: the concatenated list of the hashes of the pieces of
+ the file (optional, defaults to the empty string)
+ @return: True if the hash was not in the database before
+ (so it needs to be added to the DHT)
+ """
+ # Hash the pieces to get the piecehash
+ piecehash = ''
+ if pieces:
+ s = sha.new().update(pieces)
+ piecehash = sha.digest()
+
+ # Check the database for the hash
+ c = self.conn.cursor()
+ c.execute("SELECT hashID, piecehash FROM hashes WHERE hash = ?", (khash(hash), ))
+ row = c.fetchone()
+ if row:
+ assert piecehash == row['piecehash']
+ new_hash = False
+ hashID = row['hashID']
+ else:
+ # Add the new hash to the database
+ c = self.conn.cursor()
+ c.execute("INSERT OR REPLACE INTO hashes (hash, pieces, piecehash, refreshed) VALUES (?, ?, ?, ?)",
+ (khash(hash), khash(pieces), khash(piecehash), datetime.now()))
+ self.conn.commit()
+ new_hash = True
+ hashID = c.lastrowid
+
+ # Add the file to the database
+ file.restat()
+ c.execute("INSERT OR REPLACE INTO files (path, hashID, size, mtime) VALUES (?, ?, ?, ?)",
+ (file.path, hashID, file.getsize(), file.getmtime()))
+ self.conn.commit()
+ c.close()
+
+ return new_hash
+
+ def getFile(self, file):
+ """Get a file from the database.
+
+ If it has changed or is missing, it is removed from the database.
+
+ @type file: L{twisted.python.filepath.FilePath}
+ @param file: the file to check
+ @return: dictionary of info for the file, False if changed, or
+ None if not in database or missing
+ """
+ c = self.conn.cursor()
+ c.execute("SELECT hash, size, mtime, pieces FROM files JOIN hashes USING (hashID) WHERE path = ?", (file.path, ))
+ row = c.fetchone()
+ res = None
+ if row:
+ res = self._removeChanged(file, row)
+ if res:
+ res = {}
+ res['hash'] = row['hash']
+ res['size'] = row['size']
+ res['pieces'] = row['pieces']
+ c.close()
+ return res
+
+ def lookupHash(self, hash, filesOnly = False):
+ """Find a file by hash in the database.
+
+ If any found files have changed or are missing, they are removed
+ from the database. If filesOnly is False then it will also look for
+ piece string hashes if no files can be found.
+
+ @return: list of dictionaries of info for the found files
+ """
+ # Try to find the hash in the files table
+ c = self.conn.cursor()
+ c.execute("SELECT path, size, mtime, refreshed, pieces FROM files JOIN hashes USING (hashID) WHERE hash = ?", (khash(hash), ))
+ row = c.fetchone()
+ files = []
+ while row:
+ # Save the file to the list of found files
+ file = FilePath(row['path'])
+ res = self._removeChanged(file, row)
+ if res:
+ res = {}
+ res['path'] = file
+ res['size'] = row['size']
+ res['refreshed'] = row['refreshed']
+ res['pieces'] = row['pieces']
+ files.append(res)
+ row = c.fetchone()
+
+ if not filesOnly and not files:
+ # No files were found, so check the piecehashes as well
+ c.execute("SELECT refreshed, pieces, piecehash FROM hashes WHERE piecehash = ?", (khash(hash), ))
+ row = c.fetchone()
+ if row:
+ res = {}
+ res['refreshed'] = row['refreshed']
+ res['pieces'] = row['pieces']
+ files.append(res)
+
+ c.close()
+ return files
+
+ def isUnchanged(self, file):
+ """Check if a file in the file system has changed.
+
+ If it has changed, it is removed from the database.
+
+ @return: True if unchanged, False if changed, None if not in database
+ """
+ c = self.conn.cursor()
+ c.execute("SELECT size, mtime FROM files WHERE path = ?", (file.path, ))
+ row = c.fetchone()
+ return self._removeChanged(file, row)
+
+ def refreshHash(self, hash):
+ """Refresh the publishing time of a hash."""
+ c = self.conn.cursor()
+ c.execute("UPDATE hashes SET refreshed = ? WHERE hash = ?", (datetime.now(), khash(hash)))
+ c.close()
+
+ def expiredHashes(self, expireAfter):
+ """Find files that need refreshing after expireAfter seconds.
+
+ For each hash that needs refreshing, finds all the files with that hash.
+ If the file has changed or is missing, it is removed from the table.
+
+ @return: dictionary with keys the hashes, values a list of FilePaths
+ """
+ t = datetime.now() - timedelta(seconds=expireAfter)
+
+ # Find all the hashes that need refreshing
+ c = self.conn.cursor()
+ c.execute("SELECT hashID, hash, pieces FROM hashes WHERE refreshed < ?", (t, ))
+ row = c.fetchone()
+ expired = {}
+ while row:
+ res = expired.setdefault(row['hash'], {})
+ res['hashID'] = row['hashID']
+ res['hash'] = row['hash']
+ res['pieces'] = row['pieces']
+ row = c.fetchone()
+
+ # Make sure there are still valid files for each hash
+ for hash in expired.values():
+ valid = False
+ c.execute("SELECT path, size, mtime FROM files WHERE hashID = ?", (hash['hashID'], ))
+ row = c.fetchone()
+ while row:
+ res = self._removeChanged(FilePath(row['path']), row)
+ if res:
+ valid = True
+ row = c.fetchone()
+ if not valid:
+ # Remove hashes for which no files are still available
+ del expired[hash['hash']]
+ c.execute("DELETE FROM hashes WHERE hashID = ?", (hash['hashID'], ))
+
+ self.conn.commit()
+ c.close()
+
+ return expired
+
+ def removeUntrackedFiles(self, dirs):
+ """Remove files that are no longer tracked by the program.
+
+ @type dirs: C{list} of L{twisted.python.filepath.FilePath}
+ @param dirs: a list of the directories that we are tracking
+ @return: list of files that were removed
+ """
+ assert len(dirs) >= 1
+
+ # Create a list of globs and an SQL statement for the directories
+ newdirs = []
+ sql = "WHERE"
+ for dir in dirs:
+ newdirs.append(dir.child('*').path)
+ sql += " path NOT GLOB ? AND"
+ sql = sql[:-4]
+
+ # Get a listing of all the files that will be removed
+ c = self.conn.cursor()
+ c.execute("SELECT path FROM files " + sql, newdirs)
+ row = c.fetchone()
+ removed = []
+ while row:
+ removed.append(FilePath(row['path']))
+ row = c.fetchone()
+
+ # Delete all the removed files from the database
+ if removed:
+ c.execute("DELETE FROM files " + sql, newdirs)
+ self.conn.commit()
+
+ return removed
+
+ def close(self):
+ """Close the database connection."""
+ self.conn.close()
+
+class TestDB(unittest.TestCase):
+ """Tests for the khashmir database."""
+
+ timeout = 5
+ db = FilePath('/tmp/khashmir.db')
+ hash = '\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!'
+ directory = FilePath('/tmp/apt-p2p/')
+ file = FilePath('/tmp/apt-p2p/khashmir.test')
+ testfile = 'tmp/khashmir.test'
+ dirs = [FilePath('/tmp/apt-p2p/top1'),
+ FilePath('/tmp/apt-p2p/top2/sub1'),
+ FilePath('/tmp/apt-p2p/top2/sub2/')]
+
+ def setUp(self):
+ if not self.file.parent().exists():
+ self.file.parent().makedirs()
+ self.file.setContent('fgfhds')
+ self.file.touch()
+ self.store = DB(self.db)
+ self.store.storeFile(self.file, self.hash)
+
+ def test_openExistingDB(self):
+ """Tests opening an existing database."""
+ self.store.close()
+ self.store = None
+ sleep(1)
+ self.store = DB(self.db)
+ res = self.store.isUnchanged(self.file)
+ self.failUnless(res)
+
+ def test_getFile(self):
+ """Tests retrieving a file from the database."""
+ res = self.store.getFile(self.file)
+ self.failUnless(res)
+ self.failUnlessEqual(res['hash'], self.hash)
+
+ def test_lookupHash(self):
+ """Tests looking up a hash in the database."""
+ res = self.store.lookupHash(self.hash)
+ self.failUnless(res)
+ self.failUnlessEqual(len(res), 1)
+ self.failUnlessEqual(res[0]['path'].path, self.file.path)
+
+ def test_isUnchanged(self):
+ """Tests checking if a file in the database is unchanged."""
+ res = self.store.isUnchanged(self.file)
+ self.failUnless(res)
+ sleep(2)
+ self.file.touch()
+ res = self.store.isUnchanged(self.file)
+ self.failUnless(res == False)
+ res = self.store.isUnchanged(self.file)
+ self.failUnless(res is None)
+
+ def test_expiry(self):
+ """Tests retrieving the files from the database that have expired."""
+ res = self.store.expiredHashes(1)
+ self.failUnlessEqual(len(res.keys()), 0)
+ sleep(2)
+ res = self.store.expiredHashes(1)
+ self.failUnlessEqual(len(res.keys()), 1)
+ self.failUnlessEqual(res.keys()[0], self.hash)
+ self.store.refreshHash(self.hash)
+ res = self.store.expiredHashes(1)
+ self.failUnlessEqual(len(res.keys()), 0)
+
+ def build_dirs(self):
+ for dir in self.dirs:
+ file = dir.preauthChild(self.testfile)
+ if not file.parent().exists():
+ file.parent().makedirs()
+ file.setContent(file.path)
+ file.touch()
+ self.store.storeFile(file, self.hash)
+
+ def test_multipleHashes(self):
+ """Tests looking up a hash with multiple files in the database."""
+ self.build_dirs()
+ res = self.store.expiredHashes(1)
+ self.failUnlessEqual(len(res.keys()), 0)
+ res = self.store.lookupHash(self.hash)
+ self.failUnless(res)
+ self.failUnlessEqual(len(res), 4)
+ self.failUnlessEqual(res[0]['refreshed'], res[1]['refreshed'])
+ self.failUnlessEqual(res[0]['refreshed'], res[2]['refreshed'])
+ self.failUnlessEqual(res[0]['refreshed'], res[3]['refreshed'])
+ sleep(2)
+ res = self.store.expiredHashes(1)
+ self.failUnlessEqual(len(res.keys()), 1)
+ self.failUnlessEqual(res.keys()[0], self.hash)
+ self.store.refreshHash(self.hash)
+ res = self.store.expiredHashes(1)
+ self.failUnlessEqual(len(res.keys()), 0)
+
+ def test_removeUntracked(self):
+ """Tests removing untracked files from the database."""
+ self.build_dirs()
+ res = self.store.removeUntrackedFiles(self.dirs)
+ self.failUnlessEqual(len(res), 1, 'Got removed paths: %r' % res)
+ self.failUnlessEqual(res[0], self.file, 'Got removed paths: %r' % res)
+ res = self.store.removeUntrackedFiles(self.dirs)
+ self.failUnlessEqual(len(res), 0, 'Got removed paths: %r' % res)
+ res = self.store.removeUntrackedFiles(self.dirs[1:])
+ self.failUnlessEqual(len(res), 1, 'Got removed paths: %r' % res)
+ self.failUnlessEqual(res[0], self.dirs[0].preauthChild(self.testfile), 'Got removed paths: %r' % res)
+ res = self.store.removeUntrackedFiles(self.dirs[:1])
+ self.failUnlessEqual(len(res), 2, 'Got removed paths: %r' % res)
+ self.failUnlessIn(self.dirs[1].preauthChild(self.testfile), res, 'Got removed paths: %r' % res)
+ self.failUnlessIn(self.dirs[2].preauthChild(self.testfile), res, 'Got removed paths: %r' % res)
+
+ def tearDown(self):
+ self.directory.remove()
+ self.store.close()
+ self.db.remove()
+
--- /dev/null
+
+"""Some interfaces that are used by the apt-p2p classes."""
+
+from zope.interface import Interface
+
+class IDHT(Interface):
+ """An abstract interface for using a DHT implementation."""
+
+ def loadConfig(self, config, section):
+ """Load the DHTs configuration from a dictionary.
+
+ @type config: C{SafeConfigParser}
+ @param config: the dictionary of config values
+ """
+
+ def join(self):
+ """Bootstrap the new DHT node into the DHT.
+
+ @rtype: C{Deferred}
+ @return: a deferred that will fire when the node has joined
+ """
+
+ def leave(self):
+ """Depart gracefully from the DHT.
+
+ @rtype: C{Deferred}
+ @return: a deferred that will fire when the node has left
+ """
+
+ def getValue(self, key):
+ """Get a value from the DHT for the specified key.
+
+ The length of the key may be adjusted for use with the DHT.
+
+ @rtype: C{Deferred}
+ @return: a deferred that will fire with the stored values
+ """
+
+ def storeValue(self, key, value):
+ """Store a value in the DHT for the specified key.
+
+ The length of the key may be adjusted for use with the DHT.
+ """
--- /dev/null
+# -*- test-case-name: twisted.test.test_policies -*-
+# Copyright (c) 2001-2007 Twisted Matrix Laboratories.
+# See LICENSE for details.
+
+
+"""
+Resource limiting policies.
+
+@seealso: See also L{twisted.protocols.htb} for rate limiting.
+"""
+
+# system imports
+import sys, operator
+
+# twisted imports
+from twisted.internet.protocol import ServerFactory, Protocol, ClientFactory
+from twisted.internet import reactor, error
+from twisted.python import log
+from zope.interface import providedBy, directlyProvides
+
+
+class ProtocolWrapper(Protocol):
+ """Wraps protocol instances and acts as their transport as well."""
+
+ disconnecting = 0
+
+ def __init__(self, factory, wrappedProtocol):
+ self.wrappedProtocol = wrappedProtocol
+ self.factory = factory
+
+ def makeConnection(self, transport):
+ directlyProvides(self, *providedBy(self) + providedBy(transport))
+ Protocol.makeConnection(self, transport)
+
+ # Transport relaying
+
+ def write(self, data):
+ self.transport.write(data)
+
+ def writeSequence(self, data):
+ self.transport.writeSequence(data)
+
+ def loseConnection(self):
+ self.disconnecting = 1
+ self.transport.loseConnection()
+
+ def getPeer(self):
+ return self.transport.getPeer()
+
+ def getHost(self):
+ return self.transport.getHost()
+
+ def registerProducer(self, producer, streaming):
+ self.transport.registerProducer(producer, streaming)
+
+ def unregisterProducer(self):
+ self.transport.unregisterProducer()
+
+ def stopConsuming(self):
+ self.transport.stopConsuming()
+
+ def __getattr__(self, name):
+ return getattr(self.transport, name)
+
+ # Protocol relaying
+
+ def connectionMade(self):
+ self.factory.registerProtocol(self)
+ self.wrappedProtocol.makeConnection(self)
+
+ def dataReceived(self, data):
+ self.wrappedProtocol.dataReceived(data)
+
+ def connectionLost(self, reason):
+ self.factory.unregisterProtocol(self)
+ self.wrappedProtocol.connectionLost(reason)
+
+
+class WrappingFactory(ClientFactory):
+ """Wraps a factory and its protocols, and keeps track of them."""
+
+ protocol = ProtocolWrapper
+
+ def __init__(self, wrappedFactory):
+ self.wrappedFactory = wrappedFactory
+ self.protocols = {}
+
+ def doStart(self):
+ self.wrappedFactory.doStart()
+ ClientFactory.doStart(self)
+
+ def doStop(self):
+ self.wrappedFactory.doStop()
+ ClientFactory.doStop(self)
+
+ def startedConnecting(self, connector):
+ self.wrappedFactory.startedConnecting(connector)
+
+ def clientConnectionFailed(self, connector, reason):
+ self.wrappedFactory.clientConnectionFailed(connector, reason)
+
+ def clientConnectionLost(self, connector, reason):
+ self.wrappedFactory.clientConnectionLost(connector, reason)
+
+ def buildProtocol(self, addr):
+ return self.protocol(self, self.wrappedFactory.buildProtocol(addr))
+
+ def registerProtocol(self, p):
+ """Called by protocol to register itself."""
+ self.protocols[p] = 1
+
+ def unregisterProtocol(self, p):
+ """Called by protocols when they go away."""
+ del self.protocols[p]
+
+
+class ThrottlingProtocol(ProtocolWrapper):
+ """Protocol for ThrottlingFactory."""
+
+ # wrap API for tracking bandwidth
+
+ def __init__(self, factory, wrappedProtocol):
+ ProtocolWrapper.__init__(self, factory, wrappedProtocol)
+ self._tempDataBuffer = []
+ self._tempDataLength = 0
+ self.throttled = False
+
+ def write(self, data):
+ # Check if we can write
+ if not self.throttled:
+ paused = self.factory.registerWritten(len(data))
+ if not paused:
+ ProtocolWrapper.write(self, data)
+
+ if paused is not None and hasattr(self, "producer") and self.producer and not self.producer.paused:
+ # Interrupt the flow so that others can can have a chance
+ # We can only do this if it's not already paused otherwise we
+ # risk unpausing something that the Server paused
+ self.producer.pauseProducing()
+ reactor.callLater(0, self.producer.resumeProducing)
+
+ if self.throttled or paused:
+ # Can't write, buffer the data
+ self._tempDataBuffer.append(data)
+ self._tempDataLength += len(data)
+ self._throttleWrites()
+
+ def writeSequence(self, seq):
+ if not self.throttled:
+ # Write each sequence separately
+ while seq and not self.factory.registerWritten(len(seq[0])):
+ ProtocolWrapper.write(self, seq.pop(0))
+
+ # If there's some left, we must have been paused
+ if seq:
+ self._tempDataBuffer.extend(seq)
+ self._tempDataLength += reduce(operator.add, map(len, seq))
+ self._throttleWrites()
+
+ def dataReceived(self, data):
+ self.factory.registerRead(len(data))
+ ProtocolWrapper.dataReceived(self, data)
+
+ def registerProducer(self, producer, streaming):
+ assert streaming, "You can only use the ThrottlingProtocol with streaming (push) producers."
+ self.producer = producer
+ ProtocolWrapper.registerProducer(self, producer, streaming)
+
+ def unregisterProducer(self):
+ del self.producer
+ ProtocolWrapper.unregisterProducer(self)
+
+
+ def throttleReads(self):
+ self.transport.pauseProducing()
+
+ def unthrottleReads(self):
+ self.transport.resumeProducing()
+
+ def _throttleWrites(self):
+ # If we haven't yet, queue for unthrottling
+ if not self.throttled:
+ self.throttled = True
+ self.factory.throttledWrites(self)
+
+ if hasattr(self, "producer") and self.producer:
+ self.producer.pauseProducing()
+
+ def unthrottleWrites(self):
+ # Write some data
+ if self._tempDataBuffer:
+ assert not self.factory.registerWritten(len(self._tempDataBuffer[0]))
+ self._tempDataLength -= len(self._tempDataBuffer[0])
+ ProtocolWrapper.write(self, self._tempDataBuffer.pop(0))
+ assert self._tempDataLength >= 0
+
+ # If we wrote it all, start producing more
+ if not self._tempDataBuffer:
+ assert self._tempDataLength == 0
+ self.throttled = False
+ if hasattr(self, "producer") and self.producer:
+ # This might unpause something the Server has also paused, but
+ # it will get paused again on first write anyway
+ reactor.callLater(0, self.producer.resumeProducing)
+
+ return self._tempDataLength
+
+
+class ThrottlingFactory(WrappingFactory):
+ """
+ Throttles bandwidth and number of connections.
+
+ Write bandwidth will only be throttled if there is a producer
+ registered.
+ """
+
+ protocol = ThrottlingProtocol
+ CHUNK_SIZE = 4*1024
+
+ def __init__(self, wrappedFactory, maxConnectionCount=sys.maxint,
+ readLimit=None, writeLimit=None):
+ WrappingFactory.__init__(self, wrappedFactory)
+ self.connectionCount = 0
+ self.maxConnectionCount = maxConnectionCount
+ self.readLimit = readLimit # max bytes we should read per second
+ self.writeLimit = writeLimit # max bytes we should write per second
+ self.readThisSecond = 0
+ self.writeAvailable = writeLimit
+ self._writeQueue = []
+ self.unthrottleReadsID = None
+ self.checkReadBandwidthID = None
+ self.unthrottleWritesID = None
+ self.checkWriteBandwidthID = None
+
+
+ def callLater(self, period, func):
+ """
+ Wrapper around L{reactor.callLater} for test purpose.
+ """
+ return reactor.callLater(period, func)
+
+
+ def registerWritten(self, length):
+ """
+ Called by protocol to tell us more bytes were written.
+ Returns True if the bytes could not be written and the protocol should pause itself.
+ """
+ # Check if there are bytes available to write
+ if self.writeLimit is None:
+ return None
+ elif self.writeAvailable > 0:
+ self.writeAvailable -= length
+ return False
+
+ return True
+
+
+ def throttledWrites(self, p):
+ """
+ Called by the protocol to queue it for later writing.
+ """
+ assert p not in self._writeQueue
+ self._writeQueue.append(p)
+
+
+ def registerRead(self, length):
+ """
+ Called by protocol to tell us more bytes were read.
+ """
+ self.readThisSecond += length
+
+
+ def checkReadBandwidth(self):
+ """
+ Checks if we've passed bandwidth limits.
+ """
+ if self.readThisSecond > self.readLimit:
+ self.throttleReads()
+ throttleTime = (float(self.readThisSecond) / self.readLimit) - 1.0
+ self.unthrottleReadsID = self.callLater(throttleTime,
+ self.unthrottleReads)
+ self.readThisSecond = 0
+ self.checkReadBandwidthID = self.callLater(1, self.checkReadBandwidth)
+
+
+ def checkWriteBandwidth(self):
+ """
+ Add some new available bandwidth, and check for protocols to unthrottle.
+ """
+ # Increase the available write bytes, but not higher than the limit
+ self.writeAvailable = min(self.writeLimit, self.writeAvailable + self.writeLimit)
+
+ # Write from the queue until it's empty or we're throttled again
+ while self.writeAvailable > 0 and self._writeQueue:
+ # Get the first queued protocol
+ p = self._writeQueue.pop(0)
+ _tempWriteAvailable = self.writeAvailable
+ bytesLeft = 1
+
+ # Unthrottle writes until CHUNK_SIZE is reached or the protocol is unbuffered
+ while self.writeAvailable > 0 and _tempWriteAvailable - self.writeAvailable < self.CHUNK_SIZE and bytesLeft > 0:
+ # Unthrottle a single write (from the protocol's buffer)
+ bytesLeft = p.unthrottleWrites()
+
+ # If the protocol is not done, requeue it
+ if bytesLeft > 0:
+ self._writeQueue.append(p)
+
+ self.checkWriteBandwidthID = self.callLater(1, self.checkWriteBandwidth)
+
+
+ def throttleReads(self):
+ """
+ Throttle reads on all protocols.
+ """
+ log.msg("Throttling reads on %s" % self)
+ for p in self.protocols.keys():
+ p.throttleReads()
+
+
+ def unthrottleReads(self):
+ """
+ Stop throttling reads on all protocols.
+ """
+ self.unthrottleReadsID = None
+ log.msg("Stopped throttling reads on %s" % self)
+ for p in self.protocols.keys():
+ p.unthrottleReads()
+
+
+ def buildProtocol(self, addr):
+ if self.connectionCount == 0:
+ if self.readLimit is not None:
+ self.checkReadBandwidth()
+ if self.writeLimit is not None:
+ self.checkWriteBandwidth()
+
+ if self.connectionCount < self.maxConnectionCount:
+ self.connectionCount += 1
+ return WrappingFactory.buildProtocol(self, addr)
+ else:
+ log.msg("Max connection count reached!")
+ return None
+
+
+ def unregisterProtocol(self, p):
+ WrappingFactory.unregisterProtocol(self, p)
+ self.connectionCount -= 1
+ if self.connectionCount == 0:
+ if self.unthrottleReadsID is not None:
+ self.unthrottleReadsID.cancel()
+ if self.checkReadBandwidthID is not None:
+ self.checkReadBandwidthID.cancel()
+ if self.unthrottleWritesID is not None:
+ self.unthrottleWritesID.cancel()
+ if self.checkWriteBandwidthID is not None:
+ self.checkWriteBandwidthID.cancel()
+
+
+
+class SpewingProtocol(ProtocolWrapper):
+ def dataReceived(self, data):
+ log.msg("Received: %r" % data)
+ ProtocolWrapper.dataReceived(self,data)
+
+ def write(self, data):
+ log.msg("Sending: %r" % data)
+ ProtocolWrapper.write(self,data)
+
+
+
+class SpewingFactory(WrappingFactory):
+ protocol = SpewingProtocol
+
+
+
+class LimitConnectionsByPeer(WrappingFactory):
+ """Stability: Unstable"""
+
+ maxConnectionsPerPeer = 5
+
+ def startFactory(self):
+ self.peerConnections = {}
+
+ def buildProtocol(self, addr):
+ peerHost = addr[0]
+ connectionCount = self.peerConnections.get(peerHost, 0)
+ if connectionCount >= self.maxConnectionsPerPeer:
+ return None
+ self.peerConnections[peerHost] = connectionCount + 1
+ return WrappingFactory.buildProtocol(self, addr)
+
+ def unregisterProtocol(self, p):
+ peerHost = p.getPeer()[1]
+ self.peerConnections[peerHost] -= 1
+ if self.peerConnections[peerHost] == 0:
+ del self.peerConnections[peerHost]
+
+
+class LimitTotalConnectionsFactory(ServerFactory):
+ """Factory that limits the number of simultaneous connections.
+
+ API Stability: Unstable
+
+ @type connectionCount: C{int}
+ @ivar connectionCount: number of current connections.
+ @type connectionLimit: C{int} or C{None}
+ @cvar connectionLimit: maximum number of connections.
+ @type overflowProtocol: L{Protocol} or C{None}
+ @cvar overflowProtocol: Protocol to use for new connections when
+ connectionLimit is exceeded. If C{None} (the default value), excess
+ connections will be closed immediately.
+ """
+ connectionCount = 0
+ connectionLimit = None
+ overflowProtocol = None
+
+ def buildProtocol(self, addr):
+ if (self.connectionLimit is None or
+ self.connectionCount < self.connectionLimit):
+ # Build the normal protocol
+ wrappedProtocol = self.protocol()
+ elif self.overflowProtocol is None:
+ # Just drop the connection
+ return None
+ else:
+ # Too many connections, so build the overflow protocol
+ wrappedProtocol = self.overflowProtocol()
+
+ wrappedProtocol.factory = self
+ protocol = ProtocolWrapper(self, wrappedProtocol)
+ self.connectionCount += 1
+ return protocol
+
+ def registerProtocol(self, p):
+ pass
+
+ def unregisterProtocol(self, p):
+ self.connectionCount -= 1
+
+
+
+class TimeoutProtocol(ProtocolWrapper):
+ """
+ Protocol that automatically disconnects when the connection is idle.
+
+ Stability: Unstable
+ """
+
+ def __init__(self, factory, wrappedProtocol, timeoutPeriod):
+ """
+ Constructor.
+
+ @param factory: An L{IFactory}.
+ @param wrappedProtocol: A L{Protocol} to wrapp.
+ @param timeoutPeriod: Number of seconds to wait for activity before
+ timing out.
+ """
+ ProtocolWrapper.__init__(self, factory, wrappedProtocol)
+ self.timeoutCall = None
+ self.setTimeout(timeoutPeriod)
+
+
+ def setTimeout(self, timeoutPeriod=None):
+ """
+ Set a timeout.
+
+ This will cancel any existing timeouts.
+
+ @param timeoutPeriod: If not C{None}, change the timeout period.
+ Otherwise, use the existing value.
+ """
+ self.cancelTimeout()
+ if timeoutPeriod is not None:
+ self.timeoutPeriod = timeoutPeriod
+ self.timeoutCall = self.factory.callLater(self.timeoutPeriod, self.timeoutFunc)
+
+
+ def cancelTimeout(self):
+ """
+ Cancel the timeout.
+
+ If the timeout was already cancelled, this does nothing.
+ """
+ if self.timeoutCall:
+ try:
+ self.timeoutCall.cancel()
+ except error.AlreadyCalled:
+ pass
+ self.timeoutCall = None
+
+
+ def resetTimeout(self):
+ """
+ Reset the timeout, usually because some activity just happened.
+ """
+ if self.timeoutCall:
+ self.timeoutCall.reset(self.timeoutPeriod)
+
+
+ def write(self, data):
+ self.resetTimeout()
+ ProtocolWrapper.write(self, data)
+
+
+ def writeSequence(self, seq):
+ self.resetTimeout()
+ ProtocolWrapper.writeSequence(self, seq)
+
+
+ def dataReceived(self, data):
+ self.resetTimeout()
+ ProtocolWrapper.dataReceived(self, data)
+
+
+ def connectionLost(self, reason):
+ self.cancelTimeout()
+ ProtocolWrapper.connectionLost(self, reason)
+
+
+ def timeoutFunc(self):
+ """
+ This method is called when the timeout is triggered.
+
+ By default it calls L{loseConnection}. Override this if you want
+ something else to happen.
+ """
+ self.loseConnection()
+
+
+
+class TimeoutFactory(WrappingFactory):
+ """
+ Factory for TimeoutWrapper.
+
+ Stability: Unstable
+ """
+ protocol = TimeoutProtocol
+
+
+ def __init__(self, wrappedFactory, timeoutPeriod=30*60):
+ self.timeoutPeriod = timeoutPeriod
+ WrappingFactory.__init__(self, wrappedFactory)
+
+
+ def buildProtocol(self, addr):
+ return self.protocol(self, self.wrappedFactory.buildProtocol(addr),
+ timeoutPeriod=self.timeoutPeriod)
+
+
+ def callLater(self, period, func):
+ """
+ Wrapper around L{reactor.callLater} for test purpose.
+ """
+ return reactor.callLater(period, func)
+
+
+
+class TrafficLoggingProtocol(ProtocolWrapper):
+
+ def __init__(self, factory, wrappedProtocol, logfile, lengthLimit=None,
+ number=0):
+ """
+ @param factory: factory which created this protocol.
+ @type factory: C{protocol.Factory}.
+ @param wrappedProtocol: the underlying protocol.
+ @type wrappedProtocol: C{protocol.Protocol}.
+ @param logfile: file opened for writing used to write log messages.
+ @type logfile: C{file}
+ @param lengthLimit: maximum size of the datareceived logged.
+ @type lengthLimit: C{int}
+ @param number: identifier of the connection.
+ @type number: C{int}.
+ """
+ ProtocolWrapper.__init__(self, factory, wrappedProtocol)
+ self.logfile = logfile
+ self.lengthLimit = lengthLimit
+ self._number = number
+
+
+ def _log(self, line):
+ self.logfile.write(line + '\n')
+ self.logfile.flush()
+
+
+ def _mungeData(self, data):
+ if self.lengthLimit and len(data) > self.lengthLimit:
+ data = data[:self.lengthLimit - 12] + '<... elided>'
+ return data
+
+
+ # IProtocol
+ def connectionMade(self):
+ self._log('*')
+ return ProtocolWrapper.connectionMade(self)
+
+
+ def dataReceived(self, data):
+ self._log('C %d: %r' % (self._number, self._mungeData(data)))
+ return ProtocolWrapper.dataReceived(self, data)
+
+
+ def connectionLost(self, reason):
+ self._log('C %d: %r' % (self._number, reason))
+ return ProtocolWrapper.connectionLost(self, reason)
+
+
+ # ITransport
+ def write(self, data):
+ self._log('S %d: %r' % (self._number, self._mungeData(data)))
+ return ProtocolWrapper.write(self, data)
+
+
+ def writeSequence(self, iovec):
+ self._log('SV %d: %r' % (self._number, [self._mungeData(d) for d in iovec]))
+ return ProtocolWrapper.writeSequence(self, iovec)
+
+
+ def loseConnection(self):
+ self._log('S %d: *' % (self._number,))
+ return ProtocolWrapper.loseConnection(self)
+
+
+
+class TrafficLoggingFactory(WrappingFactory):
+ protocol = TrafficLoggingProtocol
+
+ _counter = 0
+
+ def __init__(self, wrappedFactory, logfilePrefix, lengthLimit=None):
+ self.logfilePrefix = logfilePrefix
+ self.lengthLimit = lengthLimit
+ WrappingFactory.__init__(self, wrappedFactory)
+
+
+ def open(self, name):
+ return file(name, 'w')
+
+
+ def buildProtocol(self, addr):
+ self._counter += 1
+ logfile = self.open(self.logfilePrefix + '-' + str(self._counter))
+ return self.protocol(self, self.wrappedFactory.buildProtocol(addr),
+ logfile, self.lengthLimit, self._counter)
+
+
+ def resetCounter(self):
+ """
+ Reset the value of the counter used to identify connections.
+ """
+ self._counter = 0
+
+
+
+class TimeoutMixin:
+ """Mixin for protocols which wish to timeout connections
+
+ @cvar timeOut: The number of seconds after which to timeout the connection.
+ """
+ timeOut = None
+
+ __timeoutCall = None
+
+ def callLater(self, period, func):
+ return reactor.callLater(period, func)
+
+
+ def resetTimeout(self):
+ """Reset the timeout count down"""
+ if self.__timeoutCall is not None and self.timeOut is not None:
+ self.__timeoutCall.reset(self.timeOut)
+
+ def setTimeout(self, period):
+ """Change the timeout period
+
+ @type period: C{int} or C{NoneType}
+ @param period: The period, in seconds, to change the timeout to, or
+ C{None} to disable the timeout.
+ """
+ prev = self.timeOut
+ self.timeOut = period
+
+ if self.__timeoutCall is not None:
+ if period is None:
+ self.__timeoutCall.cancel()
+ self.__timeoutCall = None
+ else:
+ self.__timeoutCall.reset(period)
+ elif period is not None:
+ self.__timeoutCall = self.callLater(period, self.__timedOut)
+
+ return prev
+
+ def __timedOut(self):
+ self.__timeoutCall = None
+ self.timeoutConnection()
+
+ def timeoutConnection(self):
+ """Called when the connection times out.
+ Override to define behavior other than dropping the connection.
+ """
+ self.transport.loseConnection()
--- /dev/null
+
+"""Some utitlity functions for use in the apt-p2p program.
+
+@var isLocal: a compiled regular expression suitable for testing if an
+ IP address is from a known local or private range
+"""
+
+import os, re
+
+from twisted.python import log
+from twisted.trial import unittest
+
+isLocal = re.compile('^(192\.168\.[0-9]{1,3}\.[0-9]{1,3})|'+
+ '(10\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})|'+
+ '(172\.0?([1][6-9])|([2][0-9])|([3][0-1])\.[0-9]{1,3}\.[0-9]{1,3})|'+
+ '(127\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})$')
+
+def findMyIPAddr(addrs, intended_port, local_ok = False):
+ """Find the best IP address to use from a list of possibilities.
+
+ @param addrs: the list of possible IP addresses
+ @param intended_port: the port that was supposed to be used
+ @param local_ok: whether known local/private IP ranges are allowed
+ (defaults to False)
+ @return: the preferred IP address, or None if one couldn't be found
+ """
+ log.msg("got addrs: %r" % (addrs,))
+ my_addr = None
+
+ # Try to find an address using the ifconfig function
+ try:
+ ifconfig = os.popen("/sbin/ifconfig |/bin/grep inet|"+
+ "/usr/bin/awk '{print $2}' | "+
+ "sed -e s/.*://", "r").read().strip().split('\n')
+ except:
+ ifconfig = []
+
+ # Get counts for all the non-local addresses returned from ifconfig
+ addr_count = {}
+ for addr in ifconfig:
+ if local_ok or not isLocal.match(addr):
+ addr_count.setdefault(addr, 0)
+ addr_count[addr] += 1
+
+ # If only one was found, use it as a starting point
+ local_addrs = addr_count.keys()
+ if len(local_addrs) == 1:
+ my_addr = local_addrs[0]
+ log.msg('Found remote address from ifconfig: %r' % (my_addr,))
+
+ # Get counts for all the non-local addresses returned from the DHT
+ addr_count = {}
+ port_count = {}
+ for addr in addrs:
+ if local_ok or not isLocal.match(addr[0]):
+ addr_count.setdefault(addr[0], 0)
+ addr_count[addr[0]] += 1
+ port_count.setdefault(addr[1], 0)
+ port_count[addr[1]] += 1
+
+ # Find the most popular address
+ popular_addr = []
+ popular_count = 0
+ for addr in addr_count:
+ if addr_count[addr] > popular_count:
+ popular_addr = [addr]
+ popular_count = addr_count[addr]
+ elif addr_count[addr] == popular_count:
+ popular_addr.append(addr)
+
+ # Find the most popular port
+ popular_port = []
+ popular_count = 0
+ for port in port_count:
+ if port_count[port] > popular_count:
+ popular_port = [port]
+ popular_count = port_count[port]
+ elif port_count[port] == popular_count:
+ popular_port.append(port)
+
+ # Check to make sure the port isn't being changed
+ port = intended_port
+ if len(port_count.keys()) > 1:
+ log.msg('Problem, multiple ports have been found: %r' % (port_count,))
+ if port not in port_count.keys():
+ log.msg('And none of the ports found match the intended one')
+ elif len(port_count.keys()) == 1:
+ port = port_count.keys()[0]
+ else:
+ log.msg('Port was not found')
+
+ # If one is popular, use that address
+ if len(popular_addr) == 1:
+ log.msg('Found popular address: %r' % (popular_addr[0],))
+ if my_addr and my_addr != popular_addr[0]:
+ log.msg('But the popular address does not match: %s != %s' % (popular_addr[0], my_addr))
+ my_addr = popular_addr[0]
+ elif len(popular_addr) > 1:
+ log.msg('Found multiple popular addresses: %r' % (popular_addr,))
+ if my_addr and my_addr not in popular_addr:
+ log.msg('And none of the addresses found match the ifconfig one')
+ else:
+ log.msg('No non-local addresses found: %r' % (popular_addr,))
+
+ if not my_addr:
+ log.msg("Remote IP Address could not be found for this machine")
+
+ return my_addr
+
+def ipAddrFromChicken():
+ """Retrieve a possible IP address from the ipchecken website."""
+ import urllib
+ ip_search = re.compile('\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')
+ try:
+ f = urllib.urlopen("http://www.ipchicken.com")
+ data = f.read()
+ f.close()
+ current_ip = ip_search.findall(data)
+ return current_ip
+ except Exception:
+ return []
+
+def uncompact(s):
+ """Extract the contact info from a compact peer representation.
+
+ @type s: C{string}
+ @param s: the compact representation
+ @rtype: (C{string}, C{int})
+ @return: the IP address and port number to contact the peer on
+ @raise ValueError: if the compact representation doesn't exist
+ """
+ if (len(s) != 6):
+ raise ValueError
+ ip = '.'.join([str(ord(i)) for i in s[0:4]])
+ port = (ord(s[4]) << 8) | ord(s[5])
+ return (ip, port)
+
+def compact(ip, port):
+ """Create a compact representation of peer contact info.
+
+ @type ip: C{string}
+ @param ip: the IP address of the peer
+ @type port: C{int}
+ @param port: the port number to contact the peer on
+ @rtype: C{string}
+ @return: the compact representation
+ @raise ValueError: if the compact representation doesn't exist
+ """
+
+ s = ''.join([chr(int(i)) for i in ip.split('.')]) + \
+ chr((port & 0xFF00) >> 8) + chr(port & 0xFF)
+ if len(s) != 6:
+ raise ValueError
+ return s
+
+class TestUtil(unittest.TestCase):
+ """Tests for the utilities."""
+
+ timeout = 5
+ ip = '165.234.1.34'
+ port = 61234
+
+ def test_compact(self):
+ """Make sure compacting is reversed correctly by uncompacting."""
+ d = uncompact(compact(self.ip, self.port))
+ self.failUnlessEqual(d[0], self.ip)
+ self.failUnlessEqual(d[1], self.port)
--- /dev/null
+
+"""The main interface to the Khashmir DHT.
+
+@var khashmir_dir: the name of the directory to use for DHT files
+"""
+
+from datetime import datetime
+import os, sha, random
+
+from twisted.internet import defer, reactor
+from twisted.internet.abstract import isIPAddress
+from twisted.python import log
+from twisted.trial import unittest
+from zope.interface import implements
+
+from apt_p2p.interfaces import IDHT
+from khashmir import Khashmir
+from bencode import bencode, bdecode
+
+khashmir_dir = 'apt-p2p-Khashmir'
+
+class DHTError(Exception):
+ """Represents errors that occur in the DHT."""
+
+class DHT:
+ """The main interface instance to the Khashmir DHT.
+
+ @type config: C{dictionary}
+ @ivar config: the DHT configuration values
+ @type cache_dir: C{string}
+ @ivar cache_dir: the directory to use for storing files
+ @type bootstrap: C{list} of C{string}
+ @ivar bootstrap: the nodes to contact to bootstrap into the system
+ @type bootstrap_node: C{boolean}
+ @ivar bootstrap_node: whether this node is a bootstrap node
+ @type joining: L{twisted.internet.defer.Deferred}
+ @ivar joining: if a join is underway, the deferred that will signal it's end
+ @type joined: C{boolean}
+ @ivar joined: whether the DHT network has been successfully joined
+ @type outstandingJoins: C{int}
+ @ivar outstandingJoins: the number of bootstrap nodes that have yet to respond
+ @type foundAddrs: C{list} of (C{string}, C{int})
+ @ivar foundAddrs: the IP address an port that were returned by bootstrap nodes
+ @type storing: C{dictionary}
+ @ivar storing: keys are keys for which store requests are active, values
+ are dictionaries with keys the values being stored and values the
+ deferred to call when complete
+ @type retrieving: C{dictionary}
+ @ivar retrieving: keys are the keys for which getValue requests are active,
+ values are lists of the deferreds waiting for the requests
+ @type retrieved: C{dictionary}
+ @ivar retrieved: keys are the keys for which getValue requests are active,
+ values are list of the values returned so far
+ @type config_parser: L{apt_p2p.apt_p2p_conf.AptP2PConfigParser}
+ @ivar config_parser: the configuration info for the main program
+ @type section: C{string}
+ @ivar section: the section of the configuration info that applies to the DHT
+ @type khashmir: L{khashmir.Khashmir}
+ @ivar khashmir: the khashmir DHT instance to use
+ """
+
+ implements(IDHT)
+
+ def __init__(self):
+ """Initialize the DHT."""
+ self.config = None
+ self.cache_dir = ''
+ self.bootstrap = []
+ self.bootstrap_node = False
+ self.joining = None
+ self.joined = False
+ self.outstandingJoins = 0
+ self.foundAddrs = []
+ self.storing = {}
+ self.retrieving = {}
+ self.retrieved = {}
+
+ def loadConfig(self, config, section):
+ """See L{apt_p2p.interfaces.IDHT}."""
+ self.config_parser = config
+ self.section = section
+ self.config = {}
+
+ # Get some initial values
+ self.cache_dir = os.path.join(self.config_parser.get(section, 'cache_dir'), khashmir_dir)
+ if not os.path.exists(self.cache_dir):
+ os.makedirs(self.cache_dir)
+ self.bootstrap = self.config_parser.getstringlist(section, 'BOOTSTRAP')
+ self.bootstrap_node = self.config_parser.getboolean(section, 'BOOTSTRAP_NODE')
+ for k in self.config_parser.options(section):
+ # The numbers in the config file
+ if k in ['K', 'HASH_LENGTH', 'CONCURRENT_REQS', 'STORE_REDUNDANCY',
+ 'RETRIEVE_VALUES', 'MAX_FAILURES', 'PORT']:
+ self.config[k] = self.config_parser.getint(section, k)
+ # The times in the config file
+ elif k in ['CHECKPOINT_INTERVAL', 'MIN_PING_INTERVAL',
+ 'BUCKET_STALENESS', 'KEY_EXPIRE']:
+ self.config[k] = self.config_parser.gettime(section, k)
+ # The booleans in the config file
+ elif k in ['SPEW']:
+ self.config[k] = self.config_parser.getboolean(section, k)
+ # Everything else is a string
+ else:
+ self.config[k] = self.config_parser.get(section, k)
+
+ def join(self):
+ """See L{apt_p2p.interfaces.IDHT}."""
+ if self.config is None:
+ raise DHTError, "configuration not loaded"
+ if self.joining:
+ raise DHTError, "a join is already in progress"
+
+ # Create the new khashmir instance
+ self.khashmir = Khashmir(self.config, self.cache_dir)
+
+ self.joining = defer.Deferred()
+ for node in self.bootstrap:
+ host, port = node.rsplit(':', 1)
+ port = int(port)
+
+ # Translate host names into IP addresses
+ if isIPAddress(host):
+ self._join_gotIP(host, port)
+ else:
+ reactor.resolve(host).addCallback(self._join_gotIP, port)
+
+ return self.joining
+
+ def _join_gotIP(self, ip, port):
+ """Join the DHT using a single bootstrap nodes IP address."""
+ self.outstandingJoins += 1
+ self.khashmir.addContact(ip, port, self._join_single, self._join_error)
+
+ def _join_single(self, addr):
+ """Process the response from the bootstrap node.
+
+ Finish the join by contacting close nodes.
+ """
+ self.outstandingJoins -= 1
+ if addr:
+ self.foundAddrs.append(addr)
+ if addr or self.outstandingJoins <= 0:
+ self.khashmir.findCloseNodes(self._join_complete, self._join_complete)
+ log.msg('Got back from bootstrap node: %r' % (addr,))
+
+ def _join_error(self, failure = None):
+ """Process an error in contacting the bootstrap node.
+
+ If no bootstrap nodes remain, finish the process by contacting
+ close nodes.
+ """
+ self.outstandingJoins -= 1
+ log.msg("bootstrap node could not be reached")
+ if self.outstandingJoins <= 0:
+ self.khashmir.findCloseNodes(self._join_complete, self._join_complete)
+
+ def _join_complete(self, result):
+ """End the joining process and return the addresses found for this node."""
+ if not self.joined and len(result) > 0:
+ self.joined = True
+ if self.joining and self.outstandingJoins <= 0:
+ df = self.joining
+ self.joining = None
+ if self.joined or self.bootstrap_node:
+ self.joined = True
+ df.callback(self.foundAddrs)
+ else:
+ df.errback(DHTError('could not find any nodes to bootstrap to'))
+
+ def getAddrs(self):
+ """Get the list of addresses returned by bootstrap nodes for this node."""
+ return self.foundAddrs
+
+ def leave(self):
+ """See L{apt_p2p.interfaces.IDHT}."""
+ if self.config is None:
+ raise DHTError, "configuration not loaded"
+
+ if self.joined or self.joining:
+ if self.joining:
+ self.joining.errback(DHTError('still joining when leave was called'))
+ self.joining = None
+ self.joined = False
+ self.khashmir.shutdown()
+
+ def _normKey(self, key, bits=None, bytes=None):
+ """Normalize the length of keys used in the DHT."""
+ bits = self.config["HASH_LENGTH"]
+ if bits is not None:
+ bytes = (bits - 1) // 8 + 1
+ else:
+ if bytes is None:
+ raise DHTError, "you must specify one of bits or bytes for normalization"
+
+ # Extend short keys with null bytes
+ if len(key) < bytes:
+ key = key + '\000'*(bytes - len(key))
+ # Truncate long keys
+ elif len(key) > bytes:
+ key = key[:bytes]
+ return key
+
+ def getValue(self, key):
+ """See L{apt_p2p.interfaces.IDHT}."""
+ if self.config is None:
+ raise DHTError, "configuration not loaded"
+ if not self.joined:
+ raise DHTError, "have not joined a network yet"
+
+ key = self._normKey(key)
+
+ d = defer.Deferred()
+ if key not in self.retrieving:
+ self.khashmir.valueForKey(key, self._getValue)
+ self.retrieving.setdefault(key, []).append(d)
+ return d
+
+ def _getValue(self, key, result):
+ """Process a returned list of values from the DHT."""
+ # Save the list of values to return when it is complete
+ if result:
+ self.retrieved.setdefault(key, []).extend([bdecode(r) for r in result])
+ else:
+ # Empty list, the get is complete, return the result
+ final_result = []
+ if key in self.retrieved:
+ final_result = self.retrieved[key]
+ del self.retrieved[key]
+ for i in range(len(self.retrieving[key])):
+ d = self.retrieving[key].pop(0)
+ d.callback(final_result)
+ del self.retrieving[key]
+
+ def storeValue(self, key, value):
+ """See L{apt_p2p.interfaces.IDHT}."""
+ if self.config is None:
+ raise DHTError, "configuration not loaded"
+ if not self.joined:
+ raise DHTError, "have not joined a network yet"
+
+ key = self._normKey(key)
+ bvalue = bencode(value)
+
+ if key in self.storing and bvalue in self.storing[key]:
+ raise DHTError, "already storing that key with the same value"
+
+ d = defer.Deferred()
+ self.khashmir.storeValueForKey(key, bvalue, self._storeValue)
+ self.storing.setdefault(key, {})[bvalue] = d
+ return d
+
+ def _storeValue(self, key, bvalue, result):
+ """Process the response from the DHT."""
+ if key in self.storing and bvalue in self.storing[key]:
+ # Check if the store succeeded
+ if len(result) > 0:
+ self.storing[key][bvalue].callback(result)
+ else:
+ self.storing[key][bvalue].errback(DHTError('could not store value %s in key %s' % (bvalue, key)))
+ del self.storing[key][bvalue]
+ if len(self.storing[key].keys()) == 0:
+ del self.storing[key]
+
+class TestSimpleDHT(unittest.TestCase):
+ """Simple 2-node unit tests for the DHT."""
+
+ timeout = 2
+ DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160,
+ 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4,
+ 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000,
+ 'MAX_FAILURES': 3,
+ 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600,
+ 'KEY_EXPIRE': 3600, 'SPEW': False, }
+
+ def setUp(self):
+ self.a = DHT()
+ self.b = DHT()
+ self.a.config = self.DHT_DEFAULTS.copy()
+ self.a.config['PORT'] = 4044
+ self.a.bootstrap = ["127.0.0.1:4044"]
+ self.a.bootstrap_node = True
+ self.a.cache_dir = '/tmp'
+ self.b.config = self.DHT_DEFAULTS.copy()
+ self.b.config['PORT'] = 4045
+ self.b.bootstrap = ["127.0.0.1:4044"]
+ self.b.cache_dir = '/tmp'
+
+ def test_bootstrap_join(self):
+ d = self.a.join()
+ return d
+
+ def node_join(self, result):
+ d = self.b.join()
+ return d
+
+ def test_join(self):
+ self.lastDefer = defer.Deferred()
+ d = self.a.join()
+ d.addCallback(self.node_join)
+ d.addCallback(self.lastDefer.callback)
+ return self.lastDefer
+
+ def test_normKey(self):
+ h = self.a._normKey('12345678901234567890')
+ self.failUnless(h == '12345678901234567890')
+ h = self.a._normKey('12345678901234567')
+ self.failUnless(h == '12345678901234567\000\000\000')
+ h = self.a._normKey('1234567890123456789012345')
+ self.failUnless(h == '12345678901234567890')
+ h = self.a._normKey('1234567890123456789')
+ self.failUnless(h == '1234567890123456789\000')
+ h = self.a._normKey('123456789012345678901')
+ self.failUnless(h == '12345678901234567890')
+
+ def value_stored(self, result, value):
+ self.stored -= 1
+ if self.stored == 0:
+ self.get_values()
+
+ def store_values(self, result):
+ self.stored = 3
+ d = self.a.storeValue(sha.new('4045').digest(), str(4045*3))
+ d.addCallback(self.value_stored, 4045)
+ d = self.a.storeValue(sha.new('4044').digest(), str(4044*2))
+ d.addCallback(self.value_stored, 4044)
+ d = self.b.storeValue(sha.new('4045').digest(), str(4045*2))
+ d.addCallback(self.value_stored, 4045)
+
+ def check_values(self, result, values):
+ self.checked -= 1
+ self.failUnless(len(result) == len(values))
+ for v in result:
+ self.failUnless(v in values)
+ if self.checked == 0:
+ self.lastDefer.callback(1)
+
+ def get_values(self):
+ self.checked = 4
+ d = self.a.getValue(sha.new('4044').digest())
+ d.addCallback(self.check_values, [str(4044*2)])
+ d = self.b.getValue(sha.new('4044').digest())
+ d.addCallback(self.check_values, [str(4044*2)])
+ d = self.a.getValue(sha.new('4045').digest())
+ d.addCallback(self.check_values, [str(4045*2), str(4045*3)])
+ d = self.b.getValue(sha.new('4045').digest())
+ d.addCallback(self.check_values, [str(4045*2), str(4045*3)])
+
+ def test_store(self):
+ from twisted.internet.base import DelayedCall
+ DelayedCall.debug = True
+ self.lastDefer = defer.Deferred()
+ d = self.a.join()
+ d.addCallback(self.node_join)
+ d.addCallback(self.store_values)
+ return self.lastDefer
+
+ def tearDown(self):
+ self.a.leave()
+ try:
+ os.unlink(self.a.khashmir.store.db)
+ except:
+ pass
+ self.b.leave()
+ try:
+ os.unlink(self.b.khashmir.store.db)
+ except:
+ pass
+
+class TestMultiDHT(unittest.TestCase):
+ """More complicated 20-node tests for the DHT."""
+
+ timeout = 60
+ num = 20
+ DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160,
+ 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4,
+ 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000,
+ 'MAX_FAILURES': 3,
+ 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600,
+ 'KEY_EXPIRE': 3600, 'SPEW': False, }
+
+ def setUp(self):
+ self.l = []
+ self.startport = 4081
+ for i in range(self.num):
+ self.l.append(DHT())
+ self.l[i].config = self.DHT_DEFAULTS.copy()
+ self.l[i].config['PORT'] = self.startport + i
+ self.l[i].bootstrap = ["127.0.0.1:" + str(self.startport)]
+ self.l[i].cache_dir = '/tmp'
+ self.l[0].bootstrap_node = True
+
+ def node_join(self, result, next_node):
+ d = self.l[next_node].join()
+ if next_node + 1 < len(self.l):
+ d.addCallback(self.node_join, next_node + 1)
+ else:
+ d.addCallback(self.lastDefer.callback)
+
+ def test_join(self):
+ self.timeout = 2
+ self.lastDefer = defer.Deferred()
+ d = self.l[0].join()
+ d.addCallback(self.node_join, 1)
+ return self.lastDefer
+
+ def store_values(self, result, i = 0, j = 0):
+ if j > i:
+ j -= i+1
+ i += 1
+ if i == len(self.l):
+ self.get_values()
+ else:
+ d = self.l[j].storeValue(sha.new(str(self.startport+i)).digest(), str((self.startport+i)*(j+1)))
+ d.addCallback(self.store_values, i, j+1)
+
+ def get_values(self, result = None, check = None, i = 0, j = 0):
+ if result is not None:
+ self.failUnless(len(result) == len(check))
+ for v in result:
+ self.failUnless(v in check)
+ if j >= len(self.l):
+ j -= len(self.l)
+ i += 1
+ if i == len(self.l):
+ self.lastDefer.callback(1)
+ else:
+ d = self.l[i].getValue(sha.new(str(self.startport+j)).digest())
+ check = []
+ for k in range(self.startport+j, (self.startport+j)*(j+1)+1, self.startport+j):
+ check.append(str(k))
+ d.addCallback(self.get_values, check, i, j + random.randrange(1, min(len(self.l), 10)))
+
+ def store_join(self, result, next_node):
+ d = self.l[next_node].join()
+ if next_node + 1 < len(self.l):
+ d.addCallback(self.store_join, next_node + 1)
+ else:
+ d.addCallback(self.store_values)
+
+ def test_store(self):
+ from twisted.internet.base import DelayedCall
+ DelayedCall.debug = True
+ self.lastDefer = defer.Deferred()
+ d = self.l[0].join()
+ d.addCallback(self.store_join, 1)
+ return self.lastDefer
+
+ def tearDown(self):
+ for i in self.l:
+ try:
+ i.leave()
+ os.unlink(i.khashmir.store.db)
+ except:
+ pass
--- /dev/null
+
+"""The apt-p2p implementation of the Khashmir DHT.
+
+These modules implement a modified Khashmir, which is a kademlia-like
+Distributed Hash Table available at::
+
+ http://khashmir.sourceforge.net/
+
+The protocol for the implementation's communication is described here::
+
+ http://www.camrdale.org/apt-p2p/protocol.html
+
+To run the DHT you probably want to do something like::
+
+ from apt_p2p_Khashmir import DHT
+ myDHT = DHT.DHT()
+ myDHT.loadConfig(config, section)
+ myDHT.join()
+
+at which point you should be up and running and connected to others in the DHT.
+
+"""
--- /dev/null
+## Copyright 2002-2004 Andrew Loewenstern, All Rights Reserved
+# see LICENSE.txt for license information
+
+"""Details of how to perform actions on remote peers."""
+
+from twisted.internet import reactor
+from twisted.python import log
+
+from khash import intify
+from util import uncompact
+
+class ActionBase:
+ """Base class for some long running asynchronous proccesses like finding nodes or values.
+
+ @type caller: L{khashmir.Khashmir}
+ @ivar caller: the DHT instance that is performing the action
+ @type target: C{string}
+ @ivar target: the target of the action, usually a DHT key
+ @type config: C{dictionary}
+ @ivar config: the configuration variables for the DHT
+ @type action: C{string}
+ @ivar action: the name of the action to call on remote nodes
+ @type num: C{long}
+ @ivar num: the target key in integer form
+ @type queried: C{dictionary}
+ @ivar queried: the nodes that have been queried for this action,
+ keys are node IDs, values are the node itself
+ @type answered: C{dictionary}
+ @ivar answered: the nodes that have answered the queries
+ @type found: C{dictionary}
+ @ivar found: nodes that have been found so far by the action
+ @type sorted_nodes: C{list} of L{node.Node}
+ @ivar sorted_nodes: a sorted list of nodes by there proximity to the key
+ @type results: C{dictionary}
+ @ivar results: keys are the results found so far by the action
+ @type desired_results: C{int}
+ @ivar desired_results: the minimum number of results that are needed
+ before the action should stop
+ @type callback: C{method}
+ @ivar callback: the method to call with the results
+ @type outstanding: C{int}
+ @ivar outstanding: the number of requests currently outstanding
+ @type outstanding_results: C{int}
+ @ivar outstanding_results: the number of results that are expected from
+ the requests that are currently outstanding
+ @type finished: C{boolean}
+ @ivar finished: whether the action is done
+ @type sort: C{method}
+ @ivar sort: used to sort nodes by their proximity to the target
+ """
+
+ def __init__(self, caller, target, callback, config, action, num_results = None):
+ """Initialize the action.
+
+ @type caller: L{khashmir.Khashmir}
+ @param caller: the DHT instance that is performing the action
+ @type target: C{string}
+ @param target: the target of the action, usually a DHT key
+ @type callback: C{method}
+ @param callback: the method to call with the results
+ @type config: C{dictionary}
+ @param config: the configuration variables for the DHT
+ @type action: C{string}
+ @param action: the name of the action to call on remote nodes
+ @type num_results: C{int}
+ @param num_results: the minimum number of results that are needed before
+ the action should stop (optional, defaults to getting all the results)
+
+ """
+
+ self.caller = caller
+ self.target = target
+ self.config = config
+ self.action = action
+ self.num = intify(target)
+ self.queried = {}
+ self.answered = {}
+ self.found = {}
+ self.sorted_nodes = []
+ self.results = {}
+ self.desired_results = num_results
+ self.callback = callback
+ self.outstanding = 0
+ self.outstanding_results = 0
+ self.finished = False
+
+ def sort(a, b, num=self.num):
+ """Sort nodes relative to the ID we are looking for."""
+ x, y = num ^ a.num, num ^ b.num
+ if x > y:
+ return 1
+ elif x < y:
+ return -1
+ return 0
+ self.sort = sort
+
+ #{ Main operation
+ def goWithNodes(self, nodes):
+ """Start the action's process with a list of nodes to contact."""
+ for node in nodes:
+ if node.id == self.caller.node.id:
+ continue
+ else:
+ self.found[node.id] = node
+ self.sortNodes()
+ self.schedule()
+
+ def schedule(self):
+ """Schedule requests to be sent to remote nodes."""
+ # Check if we are already done
+ if self.desired_results and ((len(self.results) >= abs(self.desired_results)) or
+ (self.desired_results < 0 and
+ len(self.answered) >= self.config['STORE_REDUNDANCY'])):
+ self.finished = True
+ result = self.generateResult()
+ reactor.callLater(0, self.callback, *result)
+
+ if self.finished or (self.desired_results and
+ len(self.results) + self.outstanding_results >= abs(self.desired_results)):
+ return
+
+ # Loop for each node that should be processed
+ for node in self.getNodesToProcess():
+ # Don't send requests twice or to ourself
+ if node.id not in self.queried and node.id != self.caller.node.id:
+ self.queried[node.id] = 1
+
+ # Get the action to call on the node
+ try:
+ f = getattr(node, self.action)
+ except AttributeError:
+ log.msg("%s doesn't have a %s method!" % (node, self.action))
+ else:
+ # Get the arguments to the action's method
+ try:
+ args, expected_results = self.generateArgs(node)
+ except ValueError:
+ pass
+ else:
+ # Call the action on the remote node
+ self.outstanding += 1
+ self.outstanding_results += expected_results
+ df = f(self.caller.node.id, *args)
+ df.addCallbacks(self.gotResponse, self.actionFailed,
+ callbackArgs = (node, expected_results),
+ errbackArgs = (node, expected_results))
+
+ # We might have to stop for now
+ if (self.outstanding >= self.config['CONCURRENT_REQS'] or
+ (self.desired_results and
+ len(self.results) + self.outstanding_results >= abs(self.desired_results))):
+ break
+
+ assert self.outstanding >= 0
+ assert self.outstanding_results >= 0
+
+ # If no requests are outstanding, then we are done
+ if self.outstanding == 0:
+ self.finished = True
+ result = self.generateResult()
+ reactor.callLater(0, self.callback, *result)
+
+ def gotResponse(self, dict, node, expected_results):
+ """Receive a response from a remote node."""
+ self.caller.insertNode(node)
+ if self.finished or self.answered.has_key(node.id):
+ # a day late and a dollar short
+ return
+ self.outstanding -= 1
+ self.outstanding_results -= expected_results
+ self.answered[node.id] = 1
+ self.processResponse(dict['rsp'])
+ self.schedule()
+
+ def actionFailed(self, err, node, expected_results):
+ """Receive an error from a remote node."""
+ log.msg("action %s failed (%s) %s/%s" % (self.action, self.config['PORT'], node.host, node.port))
+ log.err(err)
+ self.caller.table.nodeFailed(node)
+ self.outstanding -= 1
+ self.outstanding_results -= expected_results
+ self.schedule()
+
+ def handleGotNodes(self, nodes):
+ """Process any received node contact info in the response.
+
+ Not called by default, but suitable for being called by
+ L{processResponse} in a recursive node search.
+ """
+ for compact_node in nodes:
+ node_contact = uncompact(compact_node)
+ node = self.caller.Node(node_contact)
+ if not self.found.has_key(node.id):
+ self.found[node.id] = node
+
+ def sortNodes(self):
+ """Sort the nodes, if necessary.
+
+ Assumes nodes are never removed from the L{found} dictionary.
+ """
+ if len(self.sorted_nodes) != len(self.found):
+ self.sorted_nodes = self.found.values()
+ self.sorted_nodes.sort(self.sort)
+
+ #{ Subclass for specific actions
+ def getNodesToProcess(self):
+ """Generate a list of nodes to process next.
+
+ This implementation is suitable for a recurring search over all nodes.
+ """
+ self.sortNodes()
+ return self.sorted_nodes[:self.config['K']]
+
+ def generateArgs(self, node):
+ """Generate the arguments to the node's action.
+
+ These arguments will be appended to our node ID when calling the action.
+ Also return the number of results expected from this request.
+
+ @raise ValueError: if the node should not be queried
+ """
+ return (self.target, ), 0
+
+ def processResponse(self, dict):
+ """Process the response dictionary received from the remote node."""
+ self.handleGotNodes(dict['nodes'])
+
+ def generateResult(self, nodes):
+ """Create the final result to return to the L{callback} function."""
+ return []
+
+
+class FindNode(ActionBase):
+ """Find the closest nodes to the key."""
+
+ def __init__(self, caller, target, callback, config, action="findNode"):
+ ActionBase.__init__(self, caller, target, callback, config, action)
+
+ def processResponse(self, dict):
+ """Save the token received from each node."""
+ if dict["id"] in self.found:
+ self.found[dict["id"]].updateToken(dict.get('token', ''))
+ self.handleGotNodes(dict['nodes'])
+
+ def generateResult(self):
+ """Result is the K closest nodes to the target."""
+ self.sortNodes()
+ return (self.sorted_nodes[:self.config['K']], )
+
+
+class FindValue(ActionBase):
+ """Find the closest nodes to the key and check for values."""
+
+ def __init__(self, caller, target, callback, config, action="findValue"):
+ ActionBase.__init__(self, caller, target, callback, config, action)
+
+ def processResponse(self, dict):
+ """Save the number of values each node has."""
+ if dict["id"] in self.found:
+ self.found[dict["id"]].updateNumValues(dict.get('num', 0))
+ self.handleGotNodes(dict['nodes'])
+
+ def generateResult(self):
+ """Result is the nodes that have values, sorted by proximity to the key."""
+ self.sortNodes()
+ return ([node for node in self.sorted_nodes if node.num_values > 0], )
+
+
+class GetValue(ActionBase):
+ """Retrieve values from a list of nodes."""
+
+ def __init__(self, caller, target, local_results, num_results, callback, config, action="getValue"):
+ """Initialize the action with the locally available results.
+
+ @type local_results: C{list} of C{string}
+ @param local_results: the values that were available in this node
+ """
+ ActionBase.__init__(self, caller, target, callback, config, action, num_results)
+ if local_results:
+ for result in local_results:
+ self.results[result] = 1
+
+ def getNodesToProcess(self):
+ """Nodes are never added, always return the same sorted node list."""
+ return self.sorted_nodes
+
+ def generateArgs(self, node):
+ """Arguments include the number of values to request."""
+ if node.num_values > 0:
+ # Request all desired results from each node, just to be sure.
+ num_values = abs(self.desired_results) - len(self.results)
+ assert num_values > 0
+ if num_values > node.num_values:
+ num_values = 0
+ return (self.target, num_values), node.num_values
+ else:
+ raise ValueError, "Don't try and get values from this node because it doesn't have any"
+
+ def processResponse(self, dict):
+ """Save the returned values, calling the L{callback} each time there are new ones."""
+ if dict.has_key('values'):
+ def x(y, z=self.results):
+ if not z.has_key(y):
+ z[y] = 1
+ return y
+ else:
+ return None
+ z = len(dict['values'])
+ v = filter(None, map(x, dict['values']))
+ if len(v):
+ reactor.callLater(0, self.callback, self.target, v)
+
+ def generateResult(self):
+ """Results have all been returned, now send the empty list to end the action."""
+ return (self.target, [])
+
+
+class StoreValue(ActionBase):
+ """Store a value in a list of nodes."""
+
+ def __init__(self, caller, target, value, num_results, callback, config, action="storeValue"):
+ """Initialize the action with the value to store.
+
+ @type value: C{string}
+ @param value: the value to store in the nodes
+ """
+ ActionBase.__init__(self, caller, target, callback, config, action, num_results)
+ self.value = value
+
+ def getNodesToProcess(self):
+ """Nodes are never added, always return the same sorted list."""
+ return self.sorted_nodes
+
+ def generateArgs(self, node):
+ """Args include the value to store and the node's token."""
+ if node.token:
+ return (self.target, self.value, node.token), 1
+ else:
+ raise ValueError, "Don't store at this node since we don't know it's token"
+
+ def processResponse(self, dict):
+ """Save the response, though it should be nothin but the ID."""
+ self.results[dict["id"]] = dict
+
+ def generateResult(self):
+ """Return all the response IDs received."""
+ return (self.target, self.value, self.results.values())
--- /dev/null
+
+"""Functions for bencoding and bdecoding data.
+
+@type decode_func: C{dictionary} of C{function}
+@var decode_func: a dictionary of function calls to be made, based on data,
+ the keys are the first character of the data and the value is the
+ function to use to decode that data
+@type bencached_marker: C{list}
+@var bencached_marker: mutable type to ensure class origination
+@type encode_func: C{dictionary} of C{function}
+@var encode_func: a dictionary of function calls to be made, based on data,
+ the keys are the type of the data and the value is the
+ function to use to encode that data
+@type BencachedType: C{type}
+@var BencachedType: the L{Bencached} type
+"""
+
+from types import IntType, LongType, StringType, ListType, TupleType, DictType, BooleanType
+try:
+ from types import UnicodeType
+except ImportError:
+ UnicodeType = None
+from datetime import datetime
+import time
+
+from twisted.python import log
+from twisted.trial import unittest
+
+class BencodeError(ValueError):
+ pass
+
+def decode_int(x, f):
+ """Bdecode an integer.
+
+ @type x: C{string}
+ @param x: the data to decode
+ @type f: C{int}
+ @param f: the offset in the data to start at
+ @rtype: C{int}, C{int}
+ @return: the bdecoded integer, and the offset to read next
+ @raise BencodeError: if the data is improperly encoded
+
+ """
+
+ f += 1
+ newf = x.index('e', f)
+ try:
+ n = int(x[f:newf])
+ except:
+ n = long(x[f:newf])
+ if x[f] == '-':
+ if x[f + 1] == '0':
+ raise BencodeError, "integer has a leading zero after a negative sign"
+ elif x[f] == '0' and newf != f+1:
+ raise BencodeError, "integer has a leading zero"
+ return (n, newf+1)
+
+def decode_string(x, f):
+ """Bdecode a string.
+
+ @type x: C{string}
+ @param x: the data to decode
+ @type f: C{int}
+ @param f: the offset in the data to start at
+ @rtype: C{string}, C{int}
+ @return: the bdecoded string, and the offset to read next
+ @raise BencodeError: if the data is improperly encoded
+
+ """
+
+ colon = x.index(':', f)
+ try:
+ n = int(x[f:colon])
+ except (OverflowError, ValueError):
+ n = long(x[f:colon])
+ if x[f] == '0' and colon != f+1:
+ raise BencodeError, "string length has a leading zero"
+ colon += 1
+ return (x[colon:colon+n], colon+n)
+
+def decode_unicode(x, f):
+ """Bdecode a unicode string.
+
+ @type x: C{string}
+ @param x: the data to decode
+ @type f: C{int}
+ @param f: the offset in the data to start at
+ @rtype: C{int}, C{int}
+ @return: the bdecoded unicode string, and the offset to read next
+
+ """
+
+ s, f = decode_string(x, f+1)
+ return (s.decode('UTF-8'),f)
+
+def decode_datetime(x, f):
+ """Bdecode a datetime value.
+
+ @type x: C{string}
+ @param x: the data to decode
+ @type f: C{int}
+ @param f: the offset in the data to start at
+ @rtype: C{datetime.datetime}, C{int}
+ @return: the bdecoded integer, and the offset to read next
+ @raise BencodeError: if the data is improperly encoded
+
+ """
+
+ f += 1
+ newf = x.index('e', f)
+ try:
+ date = datetime(*(time.strptime(x[f:newf], '%Y-%m-%dT%H:%M:%S')[0:6]))
+ except:
+ raise BencodeError, "datetime value could not be decoded: %s" % x[f:newf]
+ return (date, newf+1)
+
+def decode_list(x, f):
+ """Bdecode a list.
+
+ @type x: C{string}
+ @param x: the data to decode
+ @type f: C{int}
+ @param f: the offset in the data to start at
+ @rtype: C{list}, C{int}
+ @return: the bdecoded list, and the offset to read next
+
+ """
+
+ r, f = [], f+1
+ while x[f] != 'e':
+ v, f = decode_func[x[f]](x, f)
+ r.append(v)
+ return (r, f + 1)
+
+def decode_dict(x, f):
+ """Bdecode a dictionary.
+
+ @type x: C{string}
+ @param x: the data to decode
+ @type f: C{int}
+ @param f: the offset in the data to start at
+ @rtype: C{dictionary}, C{int}
+ @return: the bdecoded dictionary, and the offset to read next
+ @raise BencodeError: if the data is improperly encoded
+
+ """
+
+ r, f = {}, f+1
+ lastkey = None
+ while x[f] != 'e':
+ k, f = decode_string(x, f)
+ if lastkey >= k:
+ raise BencodeError, "dictionary keys must be in sorted order"
+ lastkey = k
+ r[k], f = decode_func[x[f]](x, f)
+ return (r, f + 1)
+
+decode_func = {}
+decode_func['l'] = decode_list
+decode_func['d'] = decode_dict
+decode_func['i'] = decode_int
+decode_func['0'] = decode_string
+decode_func['1'] = decode_string
+decode_func['2'] = decode_string
+decode_func['3'] = decode_string
+decode_func['4'] = decode_string
+decode_func['5'] = decode_string
+decode_func['6'] = decode_string
+decode_func['7'] = decode_string
+decode_func['8'] = decode_string
+decode_func['9'] = decode_string
+decode_func['u'] = decode_unicode
+decode_func['t'] = decode_datetime
+
+def bdecode(x, sloppy = False):
+ """Bdecode a string of data.
+
+ @type x: C{string}
+ @param x: the data to decode
+ @type sloppy: C{boolean}
+ @param sloppy: whether to allow errors in the decoding
+ @rtype: unknown
+ @return: the bdecoded data
+ @raise BencodeError: if the data is improperly encoded
+
+ """
+
+ try:
+ r, l = decode_func[x[0]](x, 0)
+# except (IndexError, KeyError):
+ except (IndexError, KeyError, ValueError):
+ raise BencodeError, "bad bencoded data"
+ if not sloppy and l != len(x):
+ raise BencodeError, "bad bencoded data, all could not be decoded"
+ return r
+
+bencached_marker = []
+
+class Bencached(object):
+ """Dummy data structure for storing bencoded data in memory.
+
+ @type marker: C{list}
+ @ivar marker: mutable type to make sure the data was encoded by this class
+ @type bencoded: C{string}
+ @ivar bencoded: the bencoded data stored in a string
+
+ """
+
+ def __init__(self, s):
+ """
+
+ @type s: C{string}
+ @param s: the new bencoded data to store
+
+ """
+
+ self.marker = bencached_marker
+ self.bencoded = s
+
+BencachedType = type(Bencached('')) # insufficient, but good as a filter
+
+def encode_bencached(x,r):
+ """Bencode L{Bencached} data.
+
+ @type x: L{Bencached}
+ @param x: the data to encode
+ @type r: C{list}
+ @param r: the currently bencoded data, to which the bencoding of x
+ will be appended
+
+ """
+
+ assert x.marker == bencached_marker
+ r.append(x.bencoded)
+
+def encode_int(x,r):
+ """Bencode an integer.
+
+ @type x: C{int}
+ @param x: the data to encode
+ @type r: C{list}
+ @param r: the currently bencoded data, to which the bencoding of x
+ will be appended
+
+ """
+
+ r.extend(('i',str(x),'e'))
+
+def encode_bool(x,r):
+ """Bencode a boolean.
+
+ @type x: C{boolean}
+ @param x: the data to encode
+ @type r: C{list}
+ @param r: the currently bencoded data, to which the bencoding of x
+ will be appended
+
+ """
+
+ encode_int(int(x),r)
+
+def encode_string(x,r):
+ """Bencode a string.
+
+ @type x: C{string}
+ @param x: the data to encode
+ @type r: C{list}
+ @param r: the currently bencoded data, to which the bencoding of x
+ will be appended
+
+ """
+
+ r.extend((str(len(x)),':',x))
+
+def encode_unicode(x,r):
+ """Bencode a unicode string.
+
+ @type x: C{unicode}
+ @param x: the data to encode
+ @type r: C{list}
+ @param r: the currently bencoded data, to which the bencoding of x
+ will be appended
+
+ """
+
+ #r.append('u')
+ encode_string(x.encode('UTF-8'),r)
+
+def encode_datetime(x,r):
+ """Bencode a datetime value in UTC.
+
+ If the datetime object has time zone info, it is converted to UTC time.
+ Otherwise it is assumed that the time is already in UTC time.
+ Microseconds are removed.
+
+ @type x: C{datetime.datetime}
+ @param x: the data to encode
+ @type r: C{list}
+ @param r: the currently bencoded data, to which the bencoding of x
+ will be appended
+
+ """
+
+ date = x.replace(microsecond = 0)
+ offset = date.utcoffset()
+ if offset is not None:
+ utcdate = date.replace(tzinfo = None) + offset
+ else:
+ utcdate = date
+ r.extend(('t',utcdate.isoformat(),'e'))
+
+def encode_list(x,r):
+ """Bencode a list.
+
+ @type x: C{list}
+ @param x: the data to encode
+ @type r: C{list}
+ @param r: the currently bencoded data, to which the bencoding of x
+ will be appended
+
+ """
+
+ r.append('l')
+ for e in x:
+ encode_func[type(e)](e, r)
+ r.append('e')
+
+def encode_dict(x,r):
+ """Bencode a dictionary.
+
+ @type x: C{dictionary}
+ @param x: the data to encode
+ @type r: C{list}
+ @param r: the currently bencoded data, to which the bencoding of x
+ will be appended
+
+ """
+
+ r.append('d')
+ ilist = x.items()
+ ilist.sort()
+ for k,v in ilist:
+ r.extend((str(len(k)),':',k))
+ encode_func[type(v)](v, r)
+ r.append('e')
+
+encode_func = {}
+encode_func[BencachedType] = encode_bencached
+encode_func[IntType] = encode_int
+encode_func[LongType] = encode_int
+encode_func[StringType] = encode_string
+encode_func[ListType] = encode_list
+encode_func[TupleType] = encode_list
+encode_func[DictType] = encode_dict
+encode_func[BooleanType] = encode_bool
+encode_func[datetime] = encode_datetime
+if UnicodeType:
+ encode_func[UnicodeType] = encode_unicode
+
+def bencode(x):
+ """Bencode some data.
+
+ @type x: unknown
+ @param x: the data to encode
+ @rtype: string
+ @return: the bencoded data
+ @raise BencodeError: if the data contains a type that cannot be encoded
+
+ """
+ r = []
+ try:
+ encode_func[type(x)](x, r)
+ except:
+ raise BencodeError, "failed to bencode the data"
+ return ''.join(r)
+
+class TestBencode(unittest.TestCase):
+ """Test the bencoding and bdecoding of data."""
+
+ timeout = 2
+
+ def test_bdecode_string(self):
+ self.failUnlessRaises(BencodeError, bdecode, '0:0:')
+ self.failUnlessRaises(BencodeError, bdecode, '')
+ self.failUnlessRaises(BencodeError, bdecode, '35208734823ljdahflajhdf')
+ self.failUnlessRaises(BencodeError, bdecode, '2:abfdjslhfld')
+ self.failUnlessEqual(bdecode('0:'), '')
+ self.failUnlessEqual(bdecode('3:abc'), 'abc')
+ self.failUnlessEqual(bdecode('10:1234567890'), '1234567890')
+ self.failUnlessRaises(BencodeError, bdecode, '02:xy')
+ self.failUnlessRaises(BencodeError, bdecode, '9999:x')
+
+ def test_bdecode_int(self):
+ self.failUnlessRaises(BencodeError, bdecode, 'ie')
+ self.failUnlessRaises(BencodeError, bdecode, 'i341foo382e')
+ self.failUnlessEqual(bdecode('i4e'), 4L)
+ self.failUnlessEqual(bdecode('i0e'), 0L)
+ self.failUnlessEqual(bdecode('i123456789e'), 123456789L)
+ self.failUnlessEqual(bdecode('i-10e'), -10L)
+ self.failUnlessRaises(BencodeError, bdecode, 'i-0e')
+ self.failUnlessRaises(BencodeError, bdecode, 'i123')
+ self.failUnlessRaises(BencodeError, bdecode, 'i6easd')
+ self.failUnlessRaises(BencodeError, bdecode, 'i03e')
+
+ def test_bdecode_list(self):
+ self.failUnlessRaises(BencodeError, bdecode, 'l')
+ self.failUnlessEqual(bdecode('le'), [])
+ self.failUnlessRaises(BencodeError, bdecode, 'leanfdldjfh')
+ self.failUnlessEqual(bdecode('l0:0:0:e'), ['', '', ''])
+ self.failUnlessRaises(BencodeError, bdecode, 'relwjhrlewjh')
+ self.failUnlessEqual(bdecode('li1ei2ei3ee'), [1, 2, 3])
+ self.failUnlessEqual(bdecode('l3:asd2:xye'), ['asd', 'xy'])
+ self.failUnlessEqual(bdecode('ll5:Alice3:Bobeli2ei3eee'), [['Alice', 'Bob'], [2, 3]])
+ self.failUnlessRaises(BencodeError, bdecode, 'l01:ae')
+ self.failUnlessRaises(BencodeError, bdecode, 'l0:')
+
+ def test_bdecode_dict(self):
+ self.failUnlessRaises(BencodeError, bdecode, 'd')
+ self.failUnlessRaises(BencodeError, bdecode, 'defoobar')
+ self.failUnlessEqual(bdecode('de'), {})
+ self.failUnlessEqual(bdecode('d3:agei25e4:eyes4:bluee'), {'age': 25, 'eyes': 'blue'})
+ self.failUnlessEqual(bdecode('d8:spam.mp3d6:author5:Alice6:lengthi100000eee'),
+ {'spam.mp3': {'author': 'Alice', 'length': 100000}})
+ self.failUnlessRaises(BencodeError, bdecode, 'd3:fooe')
+ self.failUnlessRaises(BencodeError, bdecode, 'di1e0:e')
+ self.failUnlessRaises(BencodeError, bdecode, 'd1:b0:1:a0:e')
+ self.failUnlessRaises(BencodeError, bdecode, 'd1:a0:1:a0:e')
+ self.failUnlessRaises(BencodeError, bdecode, 'd0:0:')
+ self.failUnlessRaises(BencodeError, bdecode, 'd0:')
+
+ def test_bdecode_unicode(self):
+ self.failUnlessRaises(BencodeError, bdecode, 'u0:0:')
+ self.failUnlessRaises(BencodeError, bdecode, 'u')
+ self.failUnlessRaises(BencodeError, bdecode, 'u35208734823ljdahflajhdf')
+ self.failUnlessRaises(BencodeError, bdecode, 'u2:abfdjslhfld')
+ self.failUnlessEqual(bdecode('u0:'), '')
+ self.failUnlessEqual(bdecode('u3:abc'), 'abc')
+ self.failUnlessEqual(bdecode('u10:1234567890'), '1234567890')
+ self.failUnlessRaises(BencodeError, bdecode, 'u02:xy')
+ self.failUnlessRaises(BencodeError, bdecode, 'u9999:x')
+
+ def test_bencode_int(self):
+ self.failUnlessEqual(bencode(4), 'i4e')
+ self.failUnlessEqual(bencode(0), 'i0e')
+ self.failUnlessEqual(bencode(-10), 'i-10e')
+ self.failUnlessEqual(bencode(12345678901234567890L), 'i12345678901234567890e')
+
+ def test_bencode_string(self):
+ self.failUnlessEqual(bencode(''), '0:')
+ self.failUnlessEqual(bencode('abc'), '3:abc')
+ self.failUnlessEqual(bencode('1234567890'), '10:1234567890')
+
+ def test_bencode_list(self):
+ self.failUnlessEqual(bencode([]), 'le')
+ self.failUnlessEqual(bencode([1, 2, 3]), 'li1ei2ei3ee')
+ self.failUnlessEqual(bencode([['Alice', 'Bob'], [2, 3]]), 'll5:Alice3:Bobeli2ei3eee')
+
+ def test_bencode_dict(self):
+ self.failUnlessEqual(bencode({}), 'de')
+ self.failUnlessEqual(bencode({'age': 25, 'eyes': 'blue'}), 'd3:agei25e4:eyes4:bluee')
+ self.failUnlessEqual(bencode({'spam.mp3': {'author': 'Alice', 'length': 100000}}),
+ 'd8:spam.mp3d6:author5:Alice6:lengthi100000eee')
+ self.failUnlessRaises(BencodeError, bencode, {1: 'foo'})
+
+ def test_bencode_unicode(self):
+ self.failUnlessEqual(bencode(u''), '0:')
+ self.failUnlessEqual(bencode(u'abc'), '3:abc')
+ self.failUnlessEqual(bencode(u'1234567890'), '10:1234567890')
+
+ def test_bool(self):
+ self.failUnless(bdecode(bencode(True)))
+ self.failIf(bdecode(bencode(False)))
+
+ def test_datetime(self):
+ date = datetime.utcnow()
+ self.failUnlessEqual(bdecode(bencode(date)), date.replace(microsecond = 0))
+
+ if UnicodeType == None:
+ test_bencode_unicode.skip = "Python was not compiled with unicode support"
+ test_bdecode_unicode.skip = "Python was not compiled with unicode support"
--- /dev/null
+
+"""An sqlite database for storing nodes and key/value pairs."""
+
+from datetime import datetime, timedelta
+from pysqlite2 import dbapi2 as sqlite
+from binascii import a2b_base64, b2a_base64
+from time import sleep
+import os
+
+from twisted.trial import unittest
+
+class DBExcept(Exception):
+ pass
+
+class khash(str):
+ """Dummy class to convert all hashes to base64 for storing in the DB."""
+
+class dht_value(str):
+ """Dummy class to convert all DHT values to base64 for storing in the DB."""
+
+# Initialize the database to work with 'khash' objects (binary strings)
+sqlite.register_adapter(khash, b2a_base64)
+sqlite.register_converter("KHASH", a2b_base64)
+sqlite.register_converter("khash", a2b_base64)
+
+# Initialize the database to work with DHT values (binary strings)
+sqlite.register_adapter(dht_value, b2a_base64)
+sqlite.register_converter("DHT_VALUE", a2b_base64)
+sqlite.register_converter("dht_value", a2b_base64)
+
+class DB:
+ """An sqlite database for storing persistent node info and key/value pairs.
+
+ @type db: C{string}
+ @ivar db: the database file to use
+ @type conn: L{pysqlite2.dbapi2.Connection}
+ @ivar conn: an open connection to the sqlite database
+ """
+
+ def __init__(self, db):
+ """Load or create the database file.
+
+ @type db: C{string}
+ @param db: the database file to use
+ """
+ self.db = db
+ try:
+ os.stat(db)
+ except OSError:
+ self._createNewDB(db)
+ else:
+ self._loadDB(db)
+ if sqlite.version_info < (2, 1):
+ sqlite.register_converter("TEXT", str)
+ sqlite.register_converter("text", str)
+ else:
+ self.conn.text_factory = str
+
+ #{ Loading the DB
+ def _loadDB(self, db):
+ """Open a new connection to the existing database file"""
+ try:
+ self.conn = sqlite.connect(database=db, detect_types=sqlite.PARSE_DECLTYPES)
+ except:
+ import traceback
+ raise DBExcept, "Couldn't open DB", traceback.format_exc()
+
+ def _createNewDB(self, db):
+ """Open a connection to a new database and create the necessary tables."""
+ self.conn = sqlite.connect(database=db, detect_types=sqlite.PARSE_DECLTYPES)
+ c = self.conn.cursor()
+ c.execute("CREATE TABLE kv (key KHASH, value DHT_VALUE, last_refresh TIMESTAMP, "+
+ "PRIMARY KEY (key, value))")
+ c.execute("CREATE INDEX kv_key ON kv(key)")
+ c.execute("CREATE INDEX kv_last_refresh ON kv(last_refresh)")
+ c.execute("CREATE TABLE nodes (id KHASH PRIMARY KEY, host TEXT, port NUMBER)")
+ c.execute("CREATE TABLE self (num NUMBER PRIMARY KEY, id KHASH)")
+ self.conn.commit()
+
+ def close(self):
+ self.conn.close()
+
+ #{ This node's ID
+ def getSelfNode(self):
+ """Retrieve this node's ID from a previous run of the program."""
+ c = self.conn.cursor()
+ c.execute('SELECT id FROM self WHERE num = 0')
+ id = c.fetchone()
+ if id:
+ return id[0]
+ else:
+ return None
+
+ def saveSelfNode(self, id):
+ """Store this node's ID for a subsequent run of the program."""
+ c = self.conn.cursor()
+ c.execute("INSERT OR REPLACE INTO self VALUES (0, ?)", (khash(id),))
+ self.conn.commit()
+
+ #{ Routing table
+ def dumpRoutingTable(self, buckets):
+ """Save routing table nodes to the database."""
+ c = self.conn.cursor()
+ c.execute("DELETE FROM nodes WHERE id NOT NULL")
+ for bucket in buckets:
+ for node in bucket.l:
+ c.execute("INSERT INTO nodes VALUES (?, ?, ?)", (khash(node.id), node.host, node.port))
+ self.conn.commit()
+
+ def getRoutingTable(self):
+ """Load routing table nodes from database."""
+ c = self.conn.cursor()
+ c.execute("SELECT * FROM nodes")
+ return c.fetchall()
+
+ #{ Key/value pairs
+ def retrieveValues(self, key):
+ """Retrieve values from the database."""
+ c = self.conn.cursor()
+ c.execute("SELECT value FROM kv WHERE key = ?", (khash(key),))
+ l = []
+ rows = c.fetchall()
+ for row in rows:
+ l.append(row[0])
+ return l
+
+ def countValues(self, key):
+ """Count the number of values in the database."""
+ c = self.conn.cursor()
+ c.execute("SELECT COUNT(value) as num_values FROM kv WHERE key = ?", (khash(key),))
+ res = 0
+ row = c.fetchone()
+ if row:
+ res = row[0]
+ return res
+
+ def storeValue(self, key, value):
+ """Store or update a key and value."""
+ c = self.conn.cursor()
+ c.execute("INSERT OR REPLACE INTO kv VALUES (?, ?, ?)",
+ (khash(key), dht_value(value), datetime.now()))
+ self.conn.commit()
+
+ def expireValues(self, expireAfter):
+ """Expire older values after expireAfter seconds."""
+ t = datetime.now() - timedelta(seconds=expireAfter)
+ c = self.conn.cursor()
+ c.execute("DELETE FROM kv WHERE last_refresh < ?", (t, ))
+ self.conn.commit()
+
+class TestDB(unittest.TestCase):
+ """Tests for the khashmir database."""
+
+ timeout = 5
+ db = '/tmp/khashmir.db'
+ key = '\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!'
+
+ def setUp(self):
+ self.store = DB(self.db)
+
+ def test_selfNode(self):
+ self.store.saveSelfNode(self.key)
+ self.failUnlessEqual(self.store.getSelfNode(), self.key)
+
+ def test_Value(self):
+ self.store.storeValue(self.key, self.key)
+ val = self.store.retrieveValues(self.key)
+ self.failUnlessEqual(len(val), 1)
+ self.failUnlessEqual(val[0], self.key)
+
+ def test_expireValues(self):
+ self.store.storeValue(self.key, self.key)
+ sleep(2)
+ self.store.storeValue(self.key, self.key+self.key)
+ self.store.expireValues(1)
+ val = self.store.retrieveValues(self.key)
+ self.failUnlessEqual(len(val), 1)
+ self.failUnlessEqual(val[0], self.key+self.key)
+
+ def test_RoutingTable(self):
+ class dummy:
+ id = self.key
+ host = "127.0.0.1"
+ port = 9977
+ def contents(self):
+ return (self.id, self.host, self.port)
+ dummy2 = dummy()
+ dummy2.id = '\xaa\xbb\xcc\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!'
+ dummy2.host = '205.23.67.124'
+ dummy2.port = 12345
+ class bl:
+ def __init__(self):
+ self.l = []
+ bl1 = bl()
+ bl1.l.append(dummy())
+ bl2 = bl()
+ bl2.l.append(dummy2)
+ buckets = [bl1, bl2]
+ self.store.dumpRoutingTable(buckets)
+ rt = self.store.getRoutingTable()
+ self.failUnlessIn(dummy().contents(), rt)
+ self.failUnlessIn(dummy2.contents(), rt)
+
+ def tearDown(self):
+ self.store.close()
+ os.unlink(self.db)
--- /dev/null
+## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved
+# see LICENSE.txt for license information
+
+"""Functions to deal with hashes (node IDs and keys)."""
+
+from sha import sha
+from os import urandom
+
+from twisted.trial import unittest
+
+def intify(hstr):
+ """Convert a hash (big-endian) to a long python integer."""
+ assert len(hstr) == 20
+ return long(hstr.encode('hex'), 16)
+
+def stringify(num):
+ """Convert a long python integer to a hash."""
+ str = hex(num)[2:]
+ if str[-1] == 'L':
+ str = str[:-1]
+ if len(str) % 2 != 0:
+ str = '0' + str
+ str = str.decode('hex')
+ return (20 - len(str)) *'\x00' + str
+
+def distance(a, b):
+ """Calculate the distance between two hashes expressed as strings."""
+ return intify(a) ^ intify(b)
+
+def newID():
+ """Get a new pseudorandom globally unique hash string."""
+ h = sha()
+ h.update(urandom(20))
+ return h.digest()
+
+def newIDInRange(min, max):
+ """Get a new pseudorandom globally unique hash string in the range."""
+ return stringify(randRange(min,max))
+
+def randRange(min, max):
+ """Get a new pseudorandom globally unique hash number in the range."""
+ return min + intify(newID()) % (max - min)
+
+def newTID():
+ """Get a new pseudorandom transaction ID number."""
+ return randRange(-2**30, 2**30)
+
+class TestNewID(unittest.TestCase):
+ """Test the newID function."""
+ def testLength(self):
+ self.failUnlessEqual(len(newID()), 20)
+ def testHundreds(self):
+ for x in xrange(100):
+ self.testLength
+
+class TestIntify(unittest.TestCase):
+ """Test the intify function."""
+ known = [('\0' * 20, 0),
+ ('\xff' * 20, 2L**160 - 1),
+ ]
+ def testKnown(self):
+ for str, value in self.known:
+ self.failUnlessEqual(intify(str), value)
+ def testEndianessOnce(self):
+ h = newID()
+ while h[-1] == '\xff':
+ h = newID()
+ k = h[:-1] + chr(ord(h[-1]) + 1)
+ self.failUnlessEqual(intify(k) - intify(h), 1)
+ def testEndianessLots(self):
+ for x in xrange(100):
+ self.testEndianessOnce()
+
+class TestDisantance(unittest.TestCase):
+ """Test the distance function."""
+ known = [
+ (("\0" * 20, "\xff" * 20), 2**160L -1),
+ ((sha("foo").digest(), sha("foo").digest()), 0),
+ ((sha("bar").digest(), sha("bar").digest()), 0)
+ ]
+ def testKnown(self):
+ for pair, dist in self.known:
+ self.failUnlessEqual(distance(pair[0], pair[1]), dist)
+ def testCommutitive(self):
+ for i in xrange(100):
+ x, y, z = newID(), newID(), newID()
+ self.failUnlessEqual(distance(x,y) ^ distance(y, z), distance(x, z))
+
+class TestRandRange(unittest.TestCase):
+ """Test the randRange function."""
+ def testOnce(self):
+ a = intify(newID())
+ b = intify(newID())
+ if a < b:
+ c = randRange(a, b)
+ self.failUnlessEqual(a <= c < b, True, "output out of range %d %d %d" % (b, c, a))
+ else:
+ c = randRange(b, a)
+ self.failUnlessEqual(b <= c < a, True, "output out of range %d %d %d" % (b, c, a))
+
+ def testOneHundredTimes(self):
+ for i in xrange(100):
+ self.testOnce()
--- /dev/null
+## Copyright 2002-2004 Andrew Loewenstern, All Rights Reserved
+# see LICENSE.txt for license information
+
+"""The main Khashmir program."""
+
+import warnings
+warnings.simplefilter("ignore", DeprecationWarning)
+
+from datetime import datetime, timedelta
+from random import randrange, shuffle
+from sha import sha
+import os
+
+from twisted.internet.defer import Deferred
+from twisted.internet import protocol, reactor
+from twisted.trial import unittest
+
+from db import DB
+from ktable import KTable
+from knode import KNodeBase, KNodeRead, KNodeWrite, NULL_ID
+from khash import newID, newIDInRange
+from actions import FindNode, FindValue, GetValue, StoreValue
+import krpc
+
+class KhashmirBase(protocol.Factory):
+ """The base Khashmir class, with base functionality and find node, no key-value mappings.
+
+ @type _Node: L{node.Node}
+ @ivar _Node: the knode implementation to use for this class of DHT
+ @type config: C{dictionary}
+ @ivar config: the configuration parameters for the DHT
+ @type port: C{int}
+ @ivar port: the port to listen on
+ @type store: L{db.DB}
+ @ivar store: the database to store nodes and key/value pairs in
+ @type node: L{node.Node}
+ @ivar node: this node
+ @type table: L{ktable.KTable}
+ @ivar table: the routing table
+ @type token_secrets: C{list} of C{string}
+ @ivar token_secrets: the current secrets to use to create tokens
+ @type udp: L{krpc.hostbroker}
+ @ivar udp: the factory for the KRPC protocol
+ @type listenport: L{twisted.internet.interfaces.IListeningPort}
+ @ivar listenport: the UDP listening port
+ @type next_checkpoint: L{twisted.internet.interfaces.IDelayedCall}
+ @ivar next_checkpoint: the delayed call for the next checkpoint
+ """
+
+ _Node = KNodeBase
+
+ def __init__(self, config, cache_dir='/tmp'):
+ """Initialize the Khashmir class and call the L{setup} method.
+
+ @type config: C{dictionary}
+ @param config: the configuration parameters for the DHT
+ @type cache_dir: C{string}
+ @param cache_dir: the directory to store all files in
+ (optional, defaults to the /tmp directory)
+ """
+ self.config = None
+ self.setup(config, cache_dir)
+
+ def setup(self, config, cache_dir):
+ """Setup all the Khashmir sub-modules.
+
+ @type config: C{dictionary}
+ @param config: the configuration parameters for the DHT
+ @type cache_dir: C{string}
+ @param cache_dir: the directory to store all files in
+ """
+ self.config = config
+ self.port = config['PORT']
+ self.store = DB(os.path.join(cache_dir, 'khashmir.' + str(self.port) + '.db'))
+ self.node = self._loadSelfNode('', self.port)
+ self.table = KTable(self.node, config)
+ self.token_secrets = [newID()]
+
+ # Start listening
+ self.udp = krpc.hostbroker(self, config)
+ self.udp.protocol = krpc.KRPC
+ self.listenport = reactor.listenUDP(self.port, self.udp)
+
+ # Load the routing table and begin checkpointing
+ self._loadRoutingTable()
+ self.refreshTable(force = True)
+ self.next_checkpoint = reactor.callLater(60, self.checkpoint)
+
+ def Node(self, id, host = None, port = None):
+ """Create a new node.
+
+ @see: L{node.Node.__init__}
+ """
+ n = self._Node(id, host, port)
+ n.table = self.table
+ n.conn = self.udp.connectionForAddr((n.host, n.port))
+ return n
+
+ def __del__(self):
+ """Stop listening for packets."""
+ self.listenport.stopListening()
+
+ def _loadSelfNode(self, host, port):
+ """Create this node, loading any previously saved one."""
+ id = self.store.getSelfNode()
+ if not id:
+ id = newID()
+ return self._Node(id, host, port)
+
+ def checkpoint(self):
+ """Perform some periodic maintenance operations."""
+ # Create a new token secret
+ self.token_secrets.insert(0, newID())
+ if len(self.token_secrets) > 3:
+ self.token_secrets.pop()
+
+ # Save some parameters for reloading
+ self.store.saveSelfNode(self.node.id)
+ self.store.dumpRoutingTable(self.table.buckets)
+
+ # DHT maintenance
+ self.store.expireValues(self.config['KEY_EXPIRE'])
+ self.refreshTable()
+
+ self.next_checkpoint = reactor.callLater(randrange(int(self.config['CHECKPOINT_INTERVAL'] * .9),
+ int(self.config['CHECKPOINT_INTERVAL'] * 1.1)),
+ self.checkpoint)
+
+ def _loadRoutingTable(self):
+ """Load the previous routing table nodes from the database.
+
+ It's usually a good idea to call refreshTable(force = True) after
+ loading the table.
+ """
+ nodes = self.store.getRoutingTable()
+ for rec in nodes:
+ n = self.Node(rec[0], rec[1], int(rec[2]))
+ self.table.insertNode(n, contacted = False)
+
+ #{ Local interface
+ def addContact(self, host, port, callback=None, errback=None):
+ """Ping this node and add the contact info to the table on pong.
+
+ @type host: C{string}
+ @param host: the IP address of the node to contact
+ @type port: C{int}
+ @param port:the port of the node to contact
+ @type callback: C{method}
+ @param callback: the method to call with the results, it must take 1
+ parameter, the contact info returned by the node
+ (optional, defaults to doing nothing with the results)
+ @type errback: C{method}
+ @param errback: the method to call if an error occurs
+ (optional, defaults to calling the callback with None)
+ """
+ n = self.Node(NULL_ID, host, port)
+ self.sendJoin(n, callback=callback, errback=errback)
+
+ def findNode(self, id, callback, errback=None):
+ """Find the contact info for the K closest nodes in the global table.
+
+ @type id: C{string}
+ @param id: the target ID to find the K closest nodes of
+ @type callback: C{method}
+ @param callback: the method to call with the results, it must take 1
+ parameter, the list of K closest nodes
+ @type errback: C{method}
+ @param errback: the method to call if an error occurs
+ (optional, defaults to doing nothing when an error occurs)
+ """
+ # Get K nodes out of local table/cache
+ nodes = self.table.findNodes(id)
+ d = Deferred()
+ if errback:
+ d.addCallbacks(callback, errback)
+ else:
+ d.addCallback(callback)
+
+ # If the target ID was found
+ if len(nodes) == 1 and nodes[0].id == id:
+ d.callback(nodes)
+ else:
+ # Start the finding nodes action
+ state = FindNode(self, id, d.callback, self.config)
+ reactor.callLater(0, state.goWithNodes, nodes)
+
+ def insertNode(self, node, contacted = True):
+ """Try to insert a node in our local table, pinging oldest contact if necessary.
+
+ If all you have is a host/port, then use L{addContact}, which calls this
+ method after receiving the PONG from the remote node. The reason for
+ the seperation is we can't insert a node into the table without its
+ node ID. That means of course the node passed into this method needs
+ to be a properly formed Node object with a valid ID.
+
+ @type node: L{node.Node}
+ @param node: the new node to try and insert
+ @type contacted: C{boolean}
+ @param contacted: whether the new node is known to be good, i.e.
+ responded to a request (optional, defaults to True)
+ """
+ old = self.table.insertNode(node, contacted=contacted)
+ if (old and old.id != self.node.id and
+ (datetime.now() - old.lastSeen) >
+ timedelta(seconds=self.config['MIN_PING_INTERVAL'])):
+
+ def _staleNodeHandler(oldnode = old, newnode = node):
+ """The pinged node never responded, so replace it."""
+ self.table.replaceStaleNode(oldnode, newnode)
+
+ def _notStaleNodeHandler(dict, old=old):
+ """Got a pong from the old node, so update it."""
+ dict = dict['rsp']
+ if dict['id'] == old.id:
+ self.table.justSeenNode(old.id)
+
+ # Bucket is full, check to see if old node is still available
+ df = old.ping(self.node.id)
+ df.addCallbacks(_notStaleNodeHandler, _staleNodeHandler)
+
+ def sendJoin(self, node, callback=None, errback=None):
+ """Join the DHT by pinging a bootstrap node.
+
+ @type node: L{node.Node}
+ @param node: the node to send the join to
+ @type callback: C{method}
+ @param callback: the method to call with the results, it must take 1
+ parameter, the contact info returned by the node
+ (optional, defaults to doing nothing with the results)
+ @type errback: C{method}
+ @param errback: the method to call if an error occurs
+ (optional, defaults to calling the callback with None)
+ """
+
+ def _pongHandler(dict, node=node, self=self, callback=callback):
+ """Node responded properly, callback with response."""
+ n = self.Node(dict['rsp']['id'], dict['_krpc_sender'][0], dict['_krpc_sender'][1])
+ self.insertNode(n)
+ if callback:
+ callback((dict['rsp']['ip_addr'], dict['rsp']['port']))
+
+ def _defaultPong(err, node=node, table=self.table, callback=callback, errback=errback):
+ """Error occurred, fail node and errback or callback with error."""
+ table.nodeFailed(node)
+ if errback:
+ errback()
+ elif callback:
+ callback(None)
+
+ df = node.join(self.node.id)
+ df.addCallbacks(_pongHandler, _defaultPong)
+
+ def findCloseNodes(self, callback=lambda a: None, errback = None):
+ """Perform a findNode on the ID one away from our own.
+
+ This will allow us to populate our table with nodes on our network
+ closest to our own. This is called as soon as we start up with an
+ empty table.
+
+ @type callback: C{method}
+ @param callback: the method to call with the results, it must take 1
+ parameter, the list of K closest nodes
+ (optional, defaults to doing nothing with the results)
+ @type errback: C{method}
+ @param errback: the method to call if an error occurs
+ (optional, defaults to doing nothing when an error occurs)
+ """
+ id = self.node.id[:-1] + chr((ord(self.node.id[-1]) + 1) % 256)
+ self.findNode(id, callback, errback)
+
+ def refreshTable(self, force = False):
+ """Check all the buckets for those that need refreshing.
+
+ @param force: refresh all buckets regardless of last bucket access time
+ (optional, defaults to False)
+ """
+ def callback(nodes):
+ pass
+
+ for bucket in self.table.buckets:
+ if force or (datetime.now() - bucket.lastAccessed >
+ timedelta(seconds=self.config['BUCKET_STALENESS'])):
+ # Choose a random ID in the bucket and try and find it
+ id = newIDInRange(bucket.min, bucket.max)
+ self.findNode(id, callback)
+
+ def stats(self):
+ """Collect some statistics about the DHT.
+
+ @rtype: (C{int}, C{int})
+ @return: the number contacts in our routing table, and the estimated
+ number of nodes in the entire DHT
+ """
+ num_contacts = reduce(lambda a, b: a + len(b.l), self.table.buckets, 0)
+ num_nodes = self.config['K'] * (2**(len(self.table.buckets) - 1))
+ return (num_contacts, num_nodes)
+
+ def shutdown(self):
+ """Closes the port and cancels pending later calls."""
+ self.listenport.stopListening()
+ try:
+ self.next_checkpoint.cancel()
+ except:
+ pass
+ self.store.close()
+
+ #{ Remote interface
+ def krpc_ping(self, id, _krpc_sender):
+ """Pong with our ID.
+
+ @type id: C{string}
+ @param id: the node ID of the sender node
+ @type _krpc_sender: (C{string}, C{int})
+ @param _krpc_sender: the sender node's IP address and port
+ """
+ n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
+ self.insertNode(n, contacted = False)
+
+ return {"id" : self.node.id}
+
+ def krpc_join(self, id, _krpc_sender):
+ """Add the node by responding with its address and port.
+
+ @type id: C{string}
+ @param id: the node ID of the sender node
+ @type _krpc_sender: (C{string}, C{int})
+ @param _krpc_sender: the sender node's IP address and port
+ """
+ n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
+ self.insertNode(n, contacted = False)
+
+ return {"ip_addr" : _krpc_sender[0], "port" : _krpc_sender[1], "id" : self.node.id}
+
+ def krpc_find_node(self, target, id, _krpc_sender):
+ """Find the K closest nodes to the target in the local routing table.
+
+ @type target: C{string}
+ @param target: the target ID to find nodes for
+ @type id: C{string}
+ @param id: the node ID of the sender node
+ @type _krpc_sender: (C{string}, C{int})
+ @param _krpc_sender: the sender node's IP address and port
+ """
+ n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
+ self.insertNode(n, contacted = False)
+
+ nodes = self.table.findNodes(target)
+ nodes = map(lambda node: node.contactInfo(), nodes)
+ token = sha(self.token_secrets[0] + _krpc_sender[0]).digest()
+ return {"nodes" : nodes, "token" : token, "id" : self.node.id}
+
+
+class KhashmirRead(KhashmirBase):
+ """The read-only Khashmir class, which can only retrieve (not store) key/value mappings."""
+
+ _Node = KNodeRead
+
+ #{ Local interface
+ def findValue(self, key, callback, errback=None):
+ """Get the nodes that have values for the key from the global table.
+
+ @type key: C{string}
+ @param key: the target key to find the values for
+ @type callback: C{method}
+ @param callback: the method to call with the results, it must take 1
+ parameter, the list of nodes with values
+ @type errback: C{method}
+ @param errback: the method to call if an error occurs
+ (optional, defaults to doing nothing when an error occurs)
+ """
+ # Get K nodes out of local table/cache
+ nodes = self.table.findNodes(key)
+ d = Deferred()
+ if errback:
+ d.addCallbacks(callback, errback)
+ else:
+ d.addCallback(callback)
+
+ # Search for others starting with the locally found ones
+ state = FindValue(self, key, d.callback, self.config)
+ reactor.callLater(0, state.goWithNodes, nodes)
+
+ def valueForKey(self, key, callback, searchlocal = True):
+ """Get the values found for key in global table.
+
+ Callback will be called with a list of values for each peer that
+ returns unique values. The final callback will be an empty list.
+
+ @type key: C{string}
+ @param key: the target key to get the values for
+ @type callback: C{method}
+ @param callback: the method to call with the results, it must take 2
+ parameters: the key, and the values found
+ @type searchlocal: C{boolean}
+ @param searchlocal: whether to also look for any local values
+ """
+ # Get any local values
+ if searchlocal:
+ l = self.store.retrieveValues(key)
+ if len(l) > 0:
+ reactor.callLater(0, callback, key, l)
+ else:
+ l = []
+
+ def _getValueForKey(nodes, key=key, local_values=l, response=callback, self=self):
+ """Use the found nodes to send requests for values to."""
+ state = GetValue(self, key, local_values, self.config['RETRIEVE_VALUES'], response, self.config)
+ reactor.callLater(0, state.goWithNodes, nodes)
+
+ # First lookup nodes that have values for the key
+ self.findValue(key, _getValueForKey)
+
+ #{ Remote interface
+ def krpc_find_value(self, key, id, _krpc_sender):
+ """Find the number of values stored locally for the key, and the K closest nodes.
+
+ @type key: C{string}
+ @param key: the target key to find the values and nodes for
+ @type id: C{string}
+ @param id: the node ID of the sender node
+ @type _krpc_sender: (C{string}, C{int})
+ @param _krpc_sender: the sender node's IP address and port
+ """
+ n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
+ self.insertNode(n, contacted = False)
+
+ nodes = self.table.findNodes(key)
+ nodes = map(lambda node: node.contactInfo(), nodes)
+ num_values = self.store.countValues(key)
+ return {'nodes' : nodes, 'num' : num_values, "id": self.node.id}
+
+ def krpc_get_value(self, key, num, id, _krpc_sender):
+ """Retrieve the values stored locally for the key.
+
+ @type key: C{string}
+ @param key: the target key to retrieve the values for
+ @type num: C{int}
+ @param num: the maximum number of values to retrieve, or 0 to
+ retrieve all of them
+ @type id: C{string}
+ @param id: the node ID of the sender node
+ @type _krpc_sender: (C{string}, C{int})
+ @param _krpc_sender: the sender node's IP address and port
+ """
+ n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
+ self.insertNode(n, contacted = False)
+
+ l = self.store.retrieveValues(key)
+ if num == 0 or num >= len(l):
+ return {'values' : l, "id": self.node.id}
+ else:
+ shuffle(l)
+ return {'values' : l[:num], "id": self.node.id}
+
+
+class KhashmirWrite(KhashmirRead):
+ """The read-write Khashmir class, which can store and retrieve key/value mappings."""
+
+ _Node = KNodeWrite
+
+ #{ Local interface
+ def storeValueForKey(self, key, value, callback=None):
+ """Stores the value for the key in the global table.
+
+ No status in this implementation, peers respond but don't indicate
+ status of storing values.
+
+ @type key: C{string}
+ @param key: the target key to store the value for
+ @type value: C{string}
+ @param value: the value to store with the key
+ @type callback: C{method}
+ @param callback: the method to call with the results, it must take 3
+ parameters: the key, the value stored, and the result of the store
+ (optional, defaults to doing nothing with the results)
+ """
+ def _storeValueForKey(nodes, key=key, value=value, response=callback, self=self):
+ """Use the returned K closest nodes to store the key at."""
+ if not response:
+ def _storedValueHandler(key, value, sender):
+ """Default callback that does nothing."""
+ pass
+ response = _storedValueHandler
+ action = StoreValue(self, key, value, self.config['STORE_REDUNDANCY'], response, self.config)
+ reactor.callLater(0, action.goWithNodes, nodes)
+
+ # First find the K closest nodes to operate on.
+ self.findNode(key, _storeValueForKey)
+
+ #{ Remote interface
+ def krpc_store_value(self, key, value, token, id, _krpc_sender):
+ """Store the value locally with the key.
+
+ @type key: C{string}
+ @param key: the target key to store the value for
+ @type value: C{string}
+ @param value: the value to store with the key
+ @param token: the token to confirm that this peer contacted us previously
+ @type id: C{string}
+ @param id: the node ID of the sender node
+ @type _krpc_sender: (C{string}, C{int})
+ @param _krpc_sender: the sender node's IP address and port
+ """
+ n = self.Node(id, _krpc_sender[0], _krpc_sender[1])
+ self.insertNode(n, contacted = False)
+ for secret in self.token_secrets:
+ this_token = sha(secret + _krpc_sender[0]).digest()
+ if token == this_token:
+ self.store.storeValue(key, value)
+ return {"id" : self.node.id}
+ raise krpc.KrpcError, (krpc.KRPC_ERROR_INVALID_TOKEN, 'token is invalid, do a find_nodes to get a fresh one')
+
+
+class Khashmir(KhashmirWrite):
+ """The default Khashmir class (currently the read-write L{KhashmirWrite})."""
+ _Node = KNodeWrite
+
+
+class SimpleTests(unittest.TestCase):
+
+ timeout = 10
+ DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160,
+ 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4,
+ 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000,
+ 'MAX_FAILURES': 3,
+ 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600,
+ 'KEY_EXPIRE': 3600, 'SPEW': False, }
+
+ def setUp(self):
+ d = self.DHT_DEFAULTS.copy()
+ d['PORT'] = 4044
+ self.a = Khashmir(d)
+ d = self.DHT_DEFAULTS.copy()
+ d['PORT'] = 4045
+ self.b = Khashmir(d)
+
+ def tearDown(self):
+ self.a.shutdown()
+ self.b.shutdown()
+ os.unlink(self.a.store.db)
+ os.unlink(self.b.store.db)
+
+ def testAddContact(self):
+ self.failUnlessEqual(len(self.a.table.buckets), 1)
+ self.failUnlessEqual(len(self.a.table.buckets[0].l), 0)
+
+ self.failUnlessEqual(len(self.b.table.buckets), 1)
+ self.failUnlessEqual(len(self.b.table.buckets[0].l), 0)
+
+ self.a.addContact('127.0.0.1', 4045)
+ reactor.iterate()
+ reactor.iterate()
+ reactor.iterate()
+ reactor.iterate()
+
+ self.failUnlessEqual(len(self.a.table.buckets), 1)
+ self.failUnlessEqual(len(self.a.table.buckets[0].l), 1)
+ self.failUnlessEqual(len(self.b.table.buckets), 1)
+ self.failUnlessEqual(len(self.b.table.buckets[0].l), 1)
+
+ def testStoreRetrieve(self):
+ self.a.addContact('127.0.0.1', 4045)
+ reactor.iterate()
+ reactor.iterate()
+ reactor.iterate()
+ reactor.iterate()
+ self.got = 0
+ self.a.storeValueForKey(sha('foo').digest(), 'foobar')
+ reactor.iterate()
+ reactor.iterate()
+ reactor.iterate()
+ reactor.iterate()
+ reactor.iterate()
+ reactor.iterate()
+ self.a.valueForKey(sha('foo').digest(), self._cb)
+ reactor.iterate()
+ reactor.iterate()
+ reactor.iterate()
+ reactor.iterate()
+ reactor.iterate()
+ reactor.iterate()
+ reactor.iterate()
+
+ def _cb(self, key, val):
+ if not val:
+ self.failUnlessEqual(self.got, 1)
+ elif 'foobar' in val:
+ self.got = 1
+
+
+class MultiTest(unittest.TestCase):
+
+ timeout = 30
+ num = 20
+ DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160,
+ 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4,
+ 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000,
+ 'MAX_FAILURES': 3,
+ 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600,
+ 'KEY_EXPIRE': 3600, 'SPEW': False, }
+
+ def _done(self, val):
+ self.done = 1
+
+ def setUp(self):
+ self.l = []
+ self.startport = 4088
+ for i in range(self.num):
+ d = self.DHT_DEFAULTS.copy()
+ d['PORT'] = self.startport + i
+ self.l.append(Khashmir(d))
+ reactor.iterate()
+ reactor.iterate()
+
+ for i in self.l:
+ i.addContact('127.0.0.1', self.l[randrange(0,self.num)].port)
+ i.addContact('127.0.0.1', self.l[randrange(0,self.num)].port)
+ i.addContact('127.0.0.1', self.l[randrange(0,self.num)].port)
+ reactor.iterate()
+ reactor.iterate()
+ reactor.iterate()
+
+ for i in self.l:
+ self.done = 0
+ i.findCloseNodes(self._done)
+ while not self.done:
+ reactor.iterate()
+ for i in self.l:
+ self.done = 0
+ i.findCloseNodes(self._done)
+ while not self.done:
+ reactor.iterate()
+
+ def tearDown(self):
+ for i in self.l:
+ i.shutdown()
+ os.unlink(i.store.db)
+
+ reactor.iterate()
+
+ def testStoreRetrieve(self):
+ for i in range(10):
+ K = newID()
+ V = newID()
+
+ for a in range(3):
+ self.done = 0
+ def _scb(key, value, result):
+ self.done = 1
+ self.l[randrange(0, self.num)].storeValueForKey(K, V, _scb)
+ while not self.done:
+ reactor.iterate()
+
+
+ def _rcb(key, val):
+ if not val:
+ self.done = 1
+ self.failUnlessEqual(self.got, 1)
+ elif V in val:
+ self.got = 1
+ for x in range(3):
+ self.got = 0
+ self.done = 0
+ self.l[randrange(0, self.num)].valueForKey(K, _rcb)
+ while not self.done:
+ reactor.iterate()
--- /dev/null
+## Copyright 2002-2004 Andrew Loewenstern, All Rights Reserved
+# see LICENSE.txt for license information
+
+"""Represents a khashmir node in the DHT."""
+
+from twisted.python import log
+
+from node import Node, NULL_ID
+
+class KNodeBase(Node):
+ """A basic node that can only be pinged and help find other nodes."""
+
+ def checkSender(self, dict):
+ """Check the sender's info to make sure it meets expectations."""
+ try:
+ senderid = dict['rsp']['id']
+ except KeyError:
+ log.msg("No peer id in response")
+ raise Exception, "No peer id in response."
+ else:
+ if self.id != NULL_ID and senderid != self.id:
+ log.msg("Got response from different node than expected.")
+ self.table.invalidateNode(self)
+
+ return dict
+
+ def errBack(self, err):
+ """Log an error that has occurred."""
+ log.err(err)
+ return err
+
+ def ping(self, id):
+ """Ping the node."""
+ df = self.conn.sendRequest('ping', {"id":id})
+ df.addErrback(self.errBack)
+ df.addCallback(self.checkSender)
+ return df
+
+ def join(self, id):
+ """Use the node to bootstrap into the system."""
+ df = self.conn.sendRequest('join', {"id":id})
+ df.addErrback(self.errBack)
+ df.addCallback(self.checkSender)
+ return df
+
+ def findNode(self, id, target):
+ """Request the nearest nodes to the target that the node knows about."""
+ df = self.conn.sendRequest('find_node', {"target" : target, "id": id})
+ df.addErrback(self.errBack)
+ df.addCallback(self.checkSender)
+ return df
+
+class KNodeRead(KNodeBase):
+ """More advanced node that can also find and send values."""
+
+ def findValue(self, id, key):
+ """Request the nearest nodes to the key that the node knows about."""
+ df = self.conn.sendRequest('find_value', {"key" : key, "id" : id})
+ df.addErrback(self.errBack)
+ df.addCallback(self.checkSender)
+ return df
+
+ def getValue(self, id, key, num):
+ """Request the values that the node has for the key."""
+ df = self.conn.sendRequest('get_value', {"key" : key, "num": num, "id" : id})
+ df.addErrback(self.errBack)
+ df.addCallback(self.checkSender)
+ return df
+
+class KNodeWrite(KNodeRead):
+ """Most advanced node that can also store values."""
+
+ def storeValue(self, id, key, value, token):
+ """Store a value in the node."""
+ df = self.conn.sendRequest('store_value', {"key" : key, "value" : value, "token" : token, "id": id})
+ df.addErrback(self.errBack)
+ df.addCallback(self.checkSender)
+ return df
--- /dev/null
+## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved
+# see LICENSE.txt for license information
+
+"""The KRPC communication protocol implementation.
+
+@var KRPC_TIMEOUT: the number of seconds after which requests timeout
+@var UDP_PACKET_LIMIT: the maximum number of bytes that can be sent in a
+ UDP packet without fragmentation
+
+@var KRPC_ERROR: the code for a generic error
+@var KRPC_ERROR_SERVER_ERROR: the code for a server error
+@var KRPC_ERROR_MALFORMED_PACKET: the code for a malformed packet error
+@var KRPC_ERROR_METHOD_UNKNOWN: the code for a method unknown error
+@var KRPC_ERROR_MALFORMED_REQUEST: the code for a malformed request error
+@var KRPC_ERROR_INVALID_TOKEN: the code for an invalid token error
+@var KRPC_ERROR_RESPONSE_TOO_LONG: the code for a response too long error
+
+@var KRPC_ERROR_INTERNAL: the code for an internal error
+@var KRPC_ERROR_RECEIVED_UNKNOWN: the code for an unknown message type error
+@var KRPC_ERROR_TIMEOUT: the code for a timeout error
+@var KRPC_ERROR_PROTOCOL_STOPPED: the code for a stopped protocol error
+
+@var TID: the identifier for the transaction ID
+@var REQ: the identifier for a request packet
+@var RSP: the identifier for a response packet
+@var TYP: the identifier for the type of packet
+@var ARG: the identifier for the argument to the request
+@var ERR: the identifier for an error packet
+
+@group Remote node error codes: KRPC_ERROR, KRPC_ERROR_SERVER_ERROR,
+ KRPC_ERROR_MALFORMED_PACKET, KRPC_ERROR_METHOD_UNKNOWN,
+ KRPC_ERROR_MALFORMED_REQUEST, KRPC_ERROR_INVALID_TOKEN,
+ KRPC_ERROR_RESPONSE_TOO_LONG
+@group Local node error codes: KRPC_ERROR_INTERNAL, KRPC_ERROR_RECEIVED_UNKNOWN,
+ KRPC_ERROR_TIMEOUT, KRPC_ERROR_PROTOCOL_STOPPED
+@group Command identifiers: TID, REQ, RSP, TYP, ARG, ERR
+
+"""
+
+from bencode import bencode, bdecode
+from time import asctime
+from math import ceil
+
+from twisted.internet.defer import Deferred
+from twisted.internet import protocol, reactor
+from twisted.python import log
+from twisted.trial import unittest
+
+from khash import newID
+
+KRPC_TIMEOUT = 20
+UDP_PACKET_LIMIT = 1472
+
+# Remote node errors
+KRPC_ERROR = 200
+KRPC_ERROR_SERVER_ERROR = 201
+KRPC_ERROR_MALFORMED_PACKET = 202
+KRPC_ERROR_METHOD_UNKNOWN = 203
+KRPC_ERROR_MALFORMED_REQUEST = 204
+KRPC_ERROR_INVALID_TOKEN = 205
+KRPC_ERROR_RESPONSE_TOO_LONG = 206
+
+# Local errors
+KRPC_ERROR_INTERNAL = 100
+KRPC_ERROR_RECEIVED_UNKNOWN = 101
+KRPC_ERROR_TIMEOUT = 102
+KRPC_ERROR_PROTOCOL_STOPPED = 103
+
+# commands
+TID = 't'
+REQ = 'q'
+RSP = 'r'
+TYP = 'y'
+ARG = 'a'
+ERR = 'e'
+
+class KrpcError(Exception):
+ """An error occurred in the KRPC protocol."""
+ pass
+
+def verifyMessage(msg):
+ """Check received message for corruption and errors.
+
+ @type msg: C{dictionary}
+ @param msg: the dictionary of information received on the connection
+ @raise KrpcError: if the message is corrupt
+ """
+
+ if type(msg) != dict:
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "not a dictionary")
+ if TYP not in msg:
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "no message type")
+ if msg[TYP] == REQ:
+ if REQ not in msg:
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "request type not specified")
+ if type(msg[REQ]) != str:
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "request type is not a string")
+ if ARG not in msg:
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "no arguments for request")
+ if type(msg[ARG]) != dict:
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "arguments for request are not in a dictionary")
+ elif msg[TYP] == RSP:
+ if RSP not in msg:
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "response not specified")
+ if type(msg[RSP]) != dict:
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "response is not a dictionary")
+ elif msg[TYP] == ERR:
+ if ERR not in msg:
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error not specified")
+ if type(msg[ERR]) != list:
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error is not a list")
+ if len(msg[ERR]) != 2:
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error is not a 2-element list")
+ if type(msg[ERR][0]) not in (int, long):
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error number is not a number")
+ if type(msg[ERR][1]) != str:
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error string is not a string")
+# else:
+# raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "unknown message type")
+ if TID not in msg:
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "no transaction ID specified")
+ if type(msg[TID]) != str:
+ raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "transaction id is not a string")
+
+class hostbroker(protocol.DatagramProtocol):
+ """The factory for the KRPC protocol.
+
+ @type server: L{khashmir.Khashmir}
+ @ivar server: the main Khashmir program
+ @type config: C{dictionary}
+ @ivar config: the configuration parameters for the DHT
+ @type connections: C{dictionary}
+ @ivar connections: all the connections that have ever been made to the
+ protocol, keys are IP address and port pairs, values are L{KRPC}
+ protocols for the addresses
+ @ivar protocol: the protocol to use to handle incoming connections
+ (added externally)
+ @type addr: (C{string}, C{int})
+ @ivar addr: the IP address and port of this node
+ """
+
+ def __init__(self, server, config):
+ """Initialize the factory.
+
+ @type server: L{khashmir.Khashmir}
+ @param server: the main DHT program
+ @type config: C{dictionary}
+ @param config: the configuration parameters for the DHT
+ """
+ self.server = server
+ self.config = config
+ # this should be changed to storage that drops old entries
+ self.connections = {}
+
+ def datagramReceived(self, datagram, addr):
+ """Optionally create a new protocol object, and handle the new datagram.
+
+ @type datagram: C{string}
+ @param datagram: the data received from the transport.
+ @type addr: (C{string}, C{int})
+ @param addr: source IP address and port of datagram.
+ """
+ c = self.connectionForAddr(addr)
+ c.datagramReceived(datagram, addr)
+ #if c.idle():
+ # del self.connections[addr]
+
+ def connectionForAddr(self, addr):
+ """Get a protocol object for the source.
+
+ @type addr: (C{string}, C{int})
+ @param addr: source IP address and port of datagram.
+ """
+ # Don't connect to ourself
+ if addr == self.addr:
+ raise KrcpError
+
+ # Create a new protocol object if necessary
+ if not self.connections.has_key(addr):
+ conn = self.protocol(addr, self.server, self.transport, self.config['SPEW'])
+ self.connections[addr] = conn
+ else:
+ conn = self.connections[addr]
+ return conn
+
+ def makeConnection(self, transport):
+ """Make a connection to a transport and save our address."""
+ protocol.DatagramProtocol.makeConnection(self, transport)
+ tup = transport.getHost()
+ self.addr = (tup.host, tup.port)
+
+ def stopProtocol(self):
+ """Stop all the open connections."""
+ for conn in self.connections.values():
+ conn.stop()
+ protocol.DatagramProtocol.stopProtocol(self)
+
+class KRPC:
+ """The KRPC protocol implementation.
+
+ @ivar transport: the transport to use for the protocol
+ @type factory: L{khashmir.Khashmir}
+ @ivar factory: the main Khashmir program
+ @type addr: (C{string}, C{int})
+ @ivar addr: the IP address and port of the source node
+ @type noisy: C{boolean}
+ @ivar noisy: whether to log additional details of the protocol
+ @type tids: C{dictionary}
+ @ivar tids: the transaction IDs outstanding for requests, keys are the
+ transaction ID of the request, values are the deferreds to call with
+ the results
+ @type stopped: C{boolean}
+ @ivar stopped: whether the protocol has been stopped
+ """
+
+ def __init__(self, addr, server, transport, spew = False):
+ """Initialize the protocol.
+
+ @type addr: (C{string}, C{int})
+ @param addr: the IP address and port of the source node
+ @type server: L{khashmir.Khashmir}
+ @param server: the main Khashmir program
+ @param transport: the transport to use for the protocol
+ @type spew: C{boolean}
+ @param spew: whether to log additional details of the protocol
+ (optional, defaults to False)
+ """
+ self.transport = transport
+ self.factory = server
+ self.addr = addr
+ self.noisy = spew
+ self.tids = {}
+ self.stopped = False
+
+ def datagramReceived(self, data, addr):
+ """Process the new datagram.
+
+ @type data: C{string}
+ @param data: the data received from the transport.
+ @type addr: (C{string}, C{int})
+ @param addr: source IP address and port of datagram.
+ """
+ if self.stopped:
+ if self.noisy:
+ log.msg("stopped, dropping message from %r: %s" % (addr, data))
+
+ # Bdecode the message
+ try:
+ msg = bdecode(data)
+ except Exception, e:
+ if self.noisy:
+ log.msg("krpc bdecode error: ")
+ log.err(e)
+ return
+
+ # Make sure the remote node isn't trying anything funny
+ try:
+ verifyMessage(msg)
+ except Exception, e:
+ log.msg("krpc message verification error: ")
+ log.err(e)
+ return
+
+ if self.noisy:
+ log.msg("%d received from %r: %s" % (self.factory.port, addr, msg))
+
+ # Process it based on its type
+ if msg[TYP] == REQ:
+ ilen = len(data)
+
+ # Requests are handled by the factory
+ f = getattr(self.factory ,"krpc_" + msg[REQ], None)
+ msg[ARG]['_krpc_sender'] = self.addr
+ if f and callable(f):
+ try:
+ ret = f(*(), **msg[ARG])
+ except KrpcError, e:
+ log.msg('Got a Krpc error while running: krpc_%s' % msg[REQ])
+ log.err(e)
+ olen = self._sendResponse(addr, msg[TID], ERR, [e[0], e[1]])
+ except TypeError, e:
+ log.msg('Got a malformed request for: krpc_%s' % msg[REQ])
+ log.err(e)
+ olen = self._sendResponse(addr, msg[TID], ERR,
+ [KRPC_ERROR_MALFORMED_REQUEST, str(e)])
+ except Exception, e:
+ log.msg('Got an unknown error while running: krpc_%s' % msg[REQ])
+ log.err(e)
+ olen = self._sendResponse(addr, msg[TID], ERR,
+ [KRPC_ERROR_SERVER_ERROR, str(e)])
+ else:
+ olen = self._sendResponse(addr, msg[TID], RSP, ret)
+ else:
+ # Request for unknown method
+ log.msg("ERROR: don't know about method %s" % msg[REQ])
+ olen = self._sendResponse(addr, msg[TID], ERR,
+ [KRPC_ERROR_METHOD_UNKNOWN, "unknown method "+str(msg[REQ])])
+ if self.noisy:
+ log.msg("%s >>> %s - %s %s %s" % (addr, self.factory.node.port,
+ ilen, msg[REQ], olen))
+ elif msg[TYP] == RSP:
+ # Responses get processed by their TID's deferred
+ if self.tids.has_key(msg[TID]):
+ df = self.tids[msg[TID]]
+ # callback
+ del(self.tids[msg[TID]])
+ df.callback({'rsp' : msg[RSP], '_krpc_sender': addr})
+ else:
+ # no tid, this transaction timed out already...
+ if self.noisy:
+ log.msg('timeout: %r' % msg[RSP]['id'])
+ elif msg[TYP] == ERR:
+ # Errors get processed by their TID's deferred's errback
+ if self.tids.has_key(msg[TID]):
+ df = self.tids[msg[TID]]
+ del(self.tids[msg[TID]])
+ # callback
+ df.errback(KrpcError(*msg[ERR]))
+ else:
+ # day late and dollar short, just log it
+ log.msg("Got an error for an unknown request: %r" % (msg[ERR], ))
+ pass
+ else:
+ # Received an unknown message type
+ if self.noisy:
+ log.msg("unknown message type: %r" % msg)
+ if msg[TID] in self.tids:
+ df = self.tids[msg[TID]]
+ del(self.tids[msg[TID]])
+ # callback
+ df.errback(KrpcError(KRPC_ERROR_RECEIVED_UNKNOWN,
+ "Received an unknown message type: %r" % msg[TYP]))
+
+ def _sendResponse(self, addr, tid, msgType, response):
+ """Helper function for sending responses to nodes.
+
+ @type addr: (C{string}, C{int})
+ @param addr: source IP address and port of datagram.
+ @param tid: the transaction ID of the request
+ @param msgType: the type of message to respond with
+ @param response: the arguments for the response
+ """
+ if not response:
+ response = {}
+
+ try:
+ # Create the response message
+ msg = {TID : tid, TYP : msgType, msgType : response}
+
+ if self.noisy:
+ log.msg("%d responding to %r: %s" % (self.factory.port, addr, msg))
+
+ out = bencode(msg)
+
+ # Make sure its not too long
+ if len(out) > UDP_PACKET_LIMIT:
+ # Can we remove some values to shorten it?
+ if 'values' in response:
+ # Save the original list of values
+ orig_values = response['values']
+ len_orig_values = len(bencode(orig_values))
+
+ # Caclulate the maximum value length possible
+ max_len_values = len_orig_values - (len(out) - UDP_PACKET_LIMIT)
+ assert max_len_values > 0
+
+ # Start with a calculation of how many values should be included
+ # (assumes all values are the same length)
+ per_value = (float(len_orig_values) - 2.0) / float(len(orig_values))
+ num_values = len(orig_values) - int(ceil(float(len(out) - UDP_PACKET_LIMIT) / per_value))
+
+ # Do a linear search for the actual maximum number possible
+ bencoded_values = len(bencode(orig_values[:num_values]))
+ while bencoded_values < max_len_values and num_values + 1 < len(orig_values):
+ bencoded_values += len(bencode(orig_values[num_values]))
+ num_values += 1
+ while bencoded_values > max_len_values and num_values > 0:
+ num_values -= 1
+ bencoded_values -= len(bencode(orig_values[num_values]))
+ assert num_values > 0
+
+ # Encode the result
+ response['values'] = orig_values[:num_values]
+ out = bencode(msg)
+ assert len(out) < UDP_PACKET_LIMIT
+ log.msg('Shortened a long packet from %d to %d values, new packet length: %d' %
+ (len(orig_values), num_values, len(out)))
+ else:
+ # Too long a response, send an error
+ log.msg('Could not send response, too long: %d bytes' % len(out))
+ msg = {TID : tid, TYP : ERR, ERR : [KRPC_ERROR_RESPONSE_TOO_LONG, "response was %d bytes" % len(out)]}
+ out = bencode(msg)
+
+ except Exception, e:
+ # Unknown error, send an error message
+ msg = {TID : tid, TYP : ERR, ERR : [KRPC_ERROR_SERVER_ERROR, "unknown error sending response: %s" % str(e)]}
+ out = bencode(msg)
+
+ self.transport.write(out, addr)
+ return len(out)
+
+ def sendRequest(self, method, args):
+ """Send a request to the remote node.
+
+ @type method: C{string}
+ @param method: the methiod name to call on the remote node
+ @param args: the arguments to send to the remote node's method
+ """
+ if self.stopped:
+ raise KrpcError, (KRPC_ERROR_PROTOCOL_STOPPED, "cannot send, connection has been stopped")
+
+ # Create the request message
+ msg = {TID : newID(), TYP : REQ, REQ : method, ARG : args}
+ if self.noisy:
+ log.msg("%d sending to %r: %s" % (self.factory.port, self.addr, msg))
+ data = bencode(msg)
+
+ # Create the deferred and save it with the TID
+ d = Deferred()
+ self.tids[msg[TID]] = d
+
+ # Schedule a later timeout call
+ def timeOut(tids = self.tids, id = msg[TID], method = method, addr = self.addr):
+ """Call the deferred's errback if a timeout occurs."""
+ if tids.has_key(id):
+ df = tids[id]
+ del(tids[id])
+ df.errback(KrpcError(KRPC_ERROR_TIMEOUT, "timeout waiting for '%s' from %r" % (method, addr)))
+ later = reactor.callLater(KRPC_TIMEOUT, timeOut)
+
+ # Cancel the timeout call if a response is received
+ def dropTimeOut(dict, later_call = later):
+ """Cancel the timeout call when a response is received."""
+ if later_call.active():
+ later_call.cancel()
+ return dict
+ d.addBoth(dropTimeOut)
+
+ self.transport.write(data, self.addr)
+ return d
+
+ def stop(self):
+ """Timeout all pending requests."""
+ for df in self.tids.values():
+ df.errback(KrpcError(KRPC_ERROR_PROTOCOL_STOPPED, 'connection has been stopped while waiting for response'))
+ self.tids = {}
+ self.stopped = True
+
+#{ For testing the KRPC protocol
+def connectionForAddr(host, port):
+ return host
+
+class Receiver(protocol.Factory):
+ protocol = KRPC
+ def __init__(self):
+ self.buf = []
+ def krpc_store(self, msg, _krpc_sender):
+ self.buf += [msg]
+ return {}
+ def krpc_echo(self, msg, _krpc_sender):
+ return {'msg': msg}
+ def krpc_values(self, length, num, _krpc_sender):
+ return {'values': ['1'*length]*num}
+
+def make(port):
+ af = Receiver()
+ a = hostbroker(af, {'SPEW': False})
+ a.protocol = KRPC
+ p = reactor.listenUDP(port, a)
+ return af, a, p
+
+class KRPCTests(unittest.TestCase):
+ timeout = 2
+
+ def setUp(self):
+ self.af, self.a, self.ap = make(1180)
+ self.bf, self.b, self.bp = make(1181)
+
+ def tearDown(self):
+ self.ap.stopListening()
+ self.bp.stopListening()
+
+ def bufEquals(self, result, value):
+ self.failUnlessEqual(self.bf.buf, value)
+
+ def testSimpleMessage(self):
+ d = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('store', {'msg' : "This is a test."})
+ d.addCallback(self.bufEquals, ["This is a test."])
+ return d
+
+ def testMessageBlast(self):
+ for i in range(100):
+ d = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('store', {'msg' : "This is a test."})
+ d.addCallback(self.bufEquals, ["This is a test."] * 100)
+ return d
+
+ def testEcho(self):
+ df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."})
+ df.addCallback(self.gotMsg, "This is a test.")
+ return df
+
+ def gotMsg(self, dict, should_be):
+ _krpc_sender = dict['_krpc_sender']
+ msg = dict['rsp']
+ self.failUnlessEqual(msg['msg'], should_be)
+
+ def testManyEcho(self):
+ for i in xrange(100):
+ df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."})
+ df.addCallback(self.gotMsg, "This is a test.")
+ return df
+
+ def testMultiEcho(self):
+ df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."})
+ df.addCallback(self.gotMsg, "This is a test.")
+
+ df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is another test."})
+ df.addCallback(self.gotMsg, "This is another test.")
+
+ df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is yet another test."})
+ df.addCallback(self.gotMsg, "This is yet another test.")
+
+ return df
+
+ def testEchoReset(self):
+ df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."})
+ df.addCallback(self.gotMsg, "This is a test.")
+
+ df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is another test."})
+ df.addCallback(self.gotMsg, "This is another test.")
+ df.addCallback(self.echoReset)
+ return df
+
+ def echoReset(self, dict):
+ del(self.a.connections[('127.0.0.1', 1181)])
+ df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is yet another test."})
+ df.addCallback(self.gotMsg, "This is yet another test.")
+ return df
+
+ def testUnknownMeth(self):
+ df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('blahblah', {'msg' : "This is a test."})
+ df.addBoth(self.gotErr, KRPC_ERROR_METHOD_UNKNOWN)
+ return df
+
+ def testMalformedRequest(self):
+ df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test.", 'foo': 'bar'})
+ df.addBoth(self.gotErr, KRPC_ERROR_MALFORMED_REQUEST)
+ return df
+
+ def gotErr(self, err, should_be):
+ self.failUnlessEqual(err.value[0], should_be)
+
+ def testLongPackets(self):
+ df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('values', {'length' : 1, 'num': 2000})
+ df.addCallback(self.gotLongRsp)
+ return df
+
+ def gotLongRsp(self, dict):
+ # Not quite accurate, but good enough
+ self.failUnless(len(bencode(dict))-10 < UDP_PACKET_LIMIT)
+
\ No newline at end of file
--- /dev/null
+## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved
+# see LICENSE.txt for license information
+
+"""The routing table and buckets for a kademlia-like DHT."""
+
+from datetime import datetime
+from bisect import bisect_left
+
+from twisted.python import log
+from twisted.trial import unittest
+
+import khash
+from node import Node, NULL_ID
+
+class KTable:
+ """Local routing table for a kademlia-like distributed hash table.
+
+ @type node: L{node.Node}
+ @ivar node: the local node
+ @type config: C{dictionary}
+ @ivar config: the configuration parameters for the DHT
+ @type buckets: C{list} of L{KBucket}
+ @ivar buckets: the buckets of nodes in the routing table
+ """
+
+ def __init__(self, node, config):
+ """Initialize the first empty bucket of everything.
+
+ @type node: L{node.Node}
+ @param node: the local node
+ @type config: C{dictionary}
+ @param config: the configuration parameters for the DHT
+ """
+ # this is the root node, a.k.a. US!
+ assert node.id != NULL_ID
+ self.node = node
+ self.config = config
+ self.buckets = [KBucket([], 0L, 2L**self.config['HASH_LENGTH'])]
+
+ def _bucketIndexForInt(self, num):
+ """Find the index of the bucket that should hold the node's ID number."""
+ return bisect_left(self.buckets, num)
+
+ def findNodes(self, id):
+ """Find the K nodes in our own local table closest to the ID.
+
+ @type id: C{string} of C{int} or L{node.Node}
+ @param id: the ID to find nodes that are close to
+ @raise TypeError: if id does not properly identify an ID
+ """
+
+ # Get the ID number from the input
+ if isinstance(id, str):
+ num = khash.intify(id)
+ elif isinstance(id, Node):
+ num = id.num
+ elif isinstance(id, int) or isinstance(id, long):
+ num = id
+ else:
+ raise TypeError, "findNodes requires an int, string, or Node"
+
+ nodes = []
+ i = self._bucketIndexForInt(num)
+
+ # If this node is already in our table then return it
+ try:
+ index = self.buckets[i].l.index(num)
+ except ValueError:
+ pass
+ else:
+ return [self.buckets[i].l[index]]
+
+ # Don't have the node, get the K closest nodes from the appropriate bucket
+ nodes = nodes + self.buckets[i].l
+
+ # Make sure we have enough
+ if len(nodes) < self.config['K']:
+ # Look in adjoining buckets for nodes
+ min = i - 1
+ max = i + 1
+ while len(nodes) < self.config['K'] and (min >= 0 or max < len(self.buckets)):
+ # Add the adjoining buckets' nodes to the list
+ if min >= 0:
+ nodes = nodes + self.buckets[min].l
+ if max < len(self.buckets):
+ nodes = nodes + self.buckets[max].l
+ min = min - 1
+ max = max + 1
+
+ # Sort the found nodes by proximity to the id and return the closest K
+ nodes.sort(lambda a, b, num=num: cmp(num ^ a.num, num ^ b.num))
+ return nodes[:self.config['K']]
+
+ def _splitBucket(self, a):
+ """Split a bucket in two.
+
+ @type a: L{KBucket}
+ @param a: the bucket to split
+ """
+ # Create a new bucket with half the (upper) range of the current bucket
+ diff = (a.max - a.min) / 2
+ b = KBucket([], a.max - diff, a.max)
+ self.buckets.insert(self.buckets.index(a.min) + 1, b)
+
+ # Reduce the input bucket's (upper) range
+ a.max = a.max - diff
+
+ # Transfer nodes to the new bucket
+ for anode in a.l[:]:
+ if anode.num >= a.max:
+ a.l.remove(anode)
+ b.l.append(anode)
+
+ def replaceStaleNode(self, stale, new = None):
+ """Replace a stale node in a bucket with a new one.
+
+ This is used by clients to replace a node returned by insertNode after
+ it fails to respond to a ping.
+
+ @type stale: L{node.Node}
+ @param stale: the stale node to remove from the bucket
+ @type new: L{node.Node}
+ @param new: the new node to add in it's place (optional, defaults to
+ not adding any node in the old node's place)
+ """
+ # Find the stale node's bucket
+ i = self._bucketIndexForInt(stale.num)
+ try:
+ it = self.buckets[i].l.index(stale.num)
+ except ValueError:
+ return
+
+ # Remove the stale node and insert the new one
+ del(self.buckets[i].l[it])
+ if new:
+ self.buckets[i].l.append(new)
+
+ def insertNode(self, node, contacted = True):
+ """Try to insert a node in the routing table.
+
+ This inserts the node, returning None if successful, otherwise returns
+ the oldest node in the bucket if it's full. The caller is then
+ responsible for pinging the returned node and calling replaceStaleNode
+ if it doesn't respond. contacted means that yes, we contacted THEM and
+ we know the node is reachable.
+
+ @type node: L{node.Node}
+ @param node: the new node to try and insert
+ @type contacted: C{boolean}
+ @param contacted: whether the new node is known to be good, i.e.
+ responded to a request (optional, defaults to True)
+ @rtype: L{node.Node}
+ @return: None if successful (the bucket wasn't full), otherwise returns the oldest node in the bucket
+ """
+ assert node.id != NULL_ID
+ if node.id == self.node.id: return
+
+ # Get the bucket for this node
+ i = self. _bucketIndexForInt(node.num)
+
+ # Check to see if node is in the bucket already
+ try:
+ it = self.buckets[i].l.index(node.num)
+ except ValueError:
+ pass
+ else:
+ # The node is already in the bucket
+ if contacted:
+ # It responded, so update it
+ node.updateLastSeen()
+ # move node to end of bucket
+ xnode = self.buckets[i].l[it]
+ del(self.buckets[i].l[it])
+ # note that we removed the original and replaced it with the new one
+ # utilizing this nodes new contact info
+ self.buckets[i].l.append(xnode)
+ self.buckets[i].touch()
+ return
+
+ # We don't have this node, check to see if the bucket is full
+ if len(self.buckets[i].l) < self.config['K']:
+ # Not full, append this node and return
+ if contacted:
+ node.updateLastSeen()
+ self.buckets[i].l.append(node)
+ self.buckets[i].touch()
+ return
+
+ # Bucket is full, check to see if the local node is not in the bucket
+ if not (self.buckets[i].min <= self.node < self.buckets[i].max):
+ # Local node not in the bucket, can't split it, return the oldest node
+ return self.buckets[i].l[0]
+
+ # Make sure our table isn't FULL, this is really unlikely
+ if len(self.buckets) >= self.config['HASH_LENGTH']:
+ log.err("Hash Table is FULL! Increase K!")
+ return
+
+ # This bucket is full and contains our node, split the bucket
+ self._splitBucket(self.buckets[i])
+
+ # Now that the bucket is split and balanced, try to insert the node again
+ return self.insertNode(node)
+
+ def justSeenNode(self, id):
+ """Mark a node as just having been seen.
+
+ Call this any time you get a message from a node, it will update it
+ in the table if it's there.
+
+ @type id: C{string} of C{int} or L{node.Node}
+ @param id: the node ID to mark as just having been seen
+ @rtype: C{datetime.datetime}
+ @return: the old lastSeen time of the node, or None if it's not in the table
+ """
+ try:
+ n = self.findNodes(id)[0]
+ except IndexError:
+ return None
+ else:
+ tstamp = n.lastSeen
+ n.updateLastSeen()
+ return tstamp
+
+ def invalidateNode(self, n):
+ """Remove the node from the routing table.
+
+ Forget about node n. Use this when you know that a node is invalid.
+ """
+ self.replaceStaleNode(n)
+
+ def nodeFailed(self, node):
+ """Mark a node as having failed once, and remove it if it has failed too much."""
+ try:
+ n = self.findNodes(node.num)[0]
+ except IndexError:
+ return None
+ else:
+ if n.msgFailed() >= self.config['MAX_FAILURES']:
+ self.invalidateNode(n)
+
+class KBucket:
+ """Single bucket of nodes in a kademlia-like routing table.
+
+ @type l: C{list} of L{node.Node}
+ @ivar l: the nodes that are in this bucket
+ @type min: C{long}
+ @ivar min: the minimum node ID that can be in this bucket
+ @type max: C{long}
+ @ivar max: the maximum node ID that can be in this bucket
+ @type lastAccessed: C{datetime.datetime}
+ @ivar lastAccessed: the last time a node in this bucket was successfully contacted
+ """
+
+ def __init__(self, contents, min, max):
+ """Initialize the bucket with nodes.
+
+ @type contents: C{list} of L{node.Node}
+ @param contents: the nodes to store in the bucket
+ @type min: C{long}
+ @param min: the minimum node ID that can be in this bucket
+ @type max: C{long}
+ @param max: the maximum node ID that can be in this bucket
+ """
+ self.l = contents
+ self.min = min
+ self.max = max
+ self.lastAccessed = datetime.now()
+
+ def touch(self):
+ """Update the L{lastAccessed} time."""
+ self.lastAccessed = datetime.now()
+
+ def getNodeWithInt(self, num):
+ """Get the node in the bucket with that number.
+
+ @type num: C{long}
+ @param num: the node ID to look for
+ @raise ValueError: if the node ID is not in the bucket
+ @rtype: L{node.Node}
+ @return: the node
+ """
+ if num in self.l: return num
+ else: raise ValueError
+
+ def __repr__(self):
+ return "<KBucket %d items (%d to %d)>" % (len(self.l), self.min, self.max)
+
+ #{ Comparators to bisect/index a list of buckets (by their range) with either a node or a long
+ def __lt__(self, a):
+ if isinstance(a, Node): a = a.num
+ return self.max <= a
+ def __le__(self, a):
+ if isinstance(a, Node): a = a.num
+ return self.min < a
+ def __gt__(self, a):
+ if isinstance(a, Node): a = a.num
+ return self.min > a
+ def __ge__(self, a):
+ if isinstance(a, Node): a = a.num
+ return self.max >= a
+ def __eq__(self, a):
+ if isinstance(a, Node): a = a.num
+ return self.min <= a and self.max > a
+ def __ne__(self, a):
+ if isinstance(a, Node): a = a.num
+ return self.min >= a or self.max < a
+
+class TestKTable(unittest.TestCase):
+ """Unit tests for the routing table."""
+
+ def setUp(self):
+ self.a = Node(khash.newID(), '127.0.0.1', 2002)
+ self.t = KTable(self.a, {'HASH_LENGTH': 160, 'K': 8, 'MAX_FAILURES': 3})
+
+ def testAddNode(self):
+ self.b = Node(khash.newID(), '127.0.0.1', 2003)
+ self.t.insertNode(self.b)
+ self.failUnlessEqual(len(self.t.buckets[0].l), 1)
+ self.failUnlessEqual(self.t.buckets[0].l[0], self.b)
+
+ def testRemove(self):
+ self.testAddNode()
+ self.t.invalidateNode(self.b)
+ self.failUnlessEqual(len(self.t.buckets[0].l), 0)
+
+ def testFail(self):
+ self.testAddNode()
+ for i in range(self.t.config['MAX_FAILURES'] - 1):
+ self.t.nodeFailed(self.b)
+ self.failUnlessEqual(len(self.t.buckets[0].l), 1)
+ self.failUnlessEqual(self.t.buckets[0].l[0], self.b)
+
+ self.t.nodeFailed(self.b)
+ self.failUnlessEqual(len(self.t.buckets[0].l), 0)
--- /dev/null
+## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved
+# see LICENSE.txt for license information
+
+"""Represents a node in the DHT.
+
+@type NULL_ID: C{string}
+@var NULL_ID: the node ID to use until one is known
+"""
+
+from datetime import datetime, MINYEAR
+from types import InstanceType
+
+from twisted.trial import unittest
+
+import khash
+from util import compact
+
+# magic id to use before we know a peer's id
+NULL_ID = 20 * '\0'
+
+class Node:
+ """Encapsulate a node's contact info.
+
+ @ivar conn: the connection to the remote node (added externally)
+ @ivar table: the routing table (added externally)
+ @type fails: C{int}
+ @ivar fails: number of times this node has failed in a row
+ @type lastSeen: C{datetime.datetime}
+ @ivar lastSeen: the last time a response was received from this node
+ @type id: C{string}
+ @ivar id: the node's ID in the DHT
+ @type num: C{long}
+ @ivar num: the node's ID in number form
+ @type host: C{string}
+ @ivar host: the IP address of the node
+ @type port: C{int}
+ @ivar port: the port of the node
+ @type token: C{string}
+ @ivar token: the last received token from the node
+ @type num_values: C{int}
+ @ivar num_values: the number of values the node has for the key in the
+ currently executing action
+ """
+
+ def __init__(self, id, host = None, port = None):
+ """Initialize the node.
+
+ @type id: C{string} or C{dictionary}
+ @param id: the node's ID in the DHT, or a dictionary containing the
+ node's id, host and port
+ @type host: C{string}
+ @param host: the IP address of the node
+ (optional, but must be specified if id is not a dictionary)
+ @type port: C{int}
+ @param port: the port of the node
+ (optional, but must be specified if id is not a dictionary)
+ """
+ self.fails = 0
+ self.lastSeen = datetime(MINYEAR, 1, 1)
+
+ # Alternate method, init Node from dictionary
+ if isinstance(id, dict):
+ host = id['host']
+ port = id['port']
+ id = id['id']
+
+ assert isinstance(id, str)
+ assert isinstance(host, str)
+ self.id = id
+ self.num = khash.intify(id)
+ self.host = host
+ self.port = int(port)
+ self.token = ''
+ self.num_values = 0
+ self._contactInfo = None
+
+ def updateLastSeen(self):
+ """Updates the last contact time of the node and resets the number of failures."""
+ self.lastSeen = datetime.now()
+ self.fails = 0
+
+ def updateToken(self, token):
+ """Update the token for the node."""
+ self.token = token
+
+ def updateNumValues(self, num_values):
+ """Update how many values the node has in the current search for a value."""
+ self.num_values = num_values
+
+ def msgFailed(self):
+ """Log a failed attempt to contact this node.
+
+ @rtype: C{int}
+ @return: the number of consecutive failures this node has
+ """
+ self.fails = self.fails + 1
+ return self.fails
+
+ def contactInfo(self):
+ """Get the compact contact info for the node."""
+ if self._contactInfo is None:
+ self._contactInfo = compact(self.id, self.host, self.port)
+ return self._contactInfo
+
+ def __repr__(self):
+ return `(self.id, self.host, self.port)`
+
+ #{ Comparators to bisect/index a list of nodes with either a node or a long
+ def __lt__(self, a):
+ if type(a) == InstanceType:
+ a = a.num
+ return self.num < a
+ def __le__(self, a):
+ if type(a) == InstanceType:
+ a = a.num
+ return self.num <= a
+ def __gt__(self, a):
+ if type(a) == InstanceType:
+ a = a.num
+ return self.num > a
+ def __ge__(self, a):
+ if type(a) == InstanceType:
+ a = a.num
+ return self.num >= a
+ def __eq__(self, a):
+ if type(a) == InstanceType:
+ a = a.num
+ return self.num == a
+ def __ne__(self, a):
+ if type(a) == InstanceType:
+ a = a.num
+ return self.num != a
+
+
+class TestNode(unittest.TestCase):
+ """Unit tests for the node implementation."""
+ def setUp(self):
+ self.node = Node(khash.newID(), '127.0.0.1', 2002)
+ def testUpdateLastSeen(self):
+ t = self.node.lastSeen
+ self.node.updateLastSeen()
+ self.failUnless(t < self.node.lastSeen)
+
\ No newline at end of file
--- /dev/null
+## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved
+# see LICENSE.txt for license information
+
+"""Some utitlity functions for use in apt-p2p's khashmir DHT."""
+
+from twisted.trial import unittest
+
+def bucket_stats(l):
+ """Given a list of khashmir instances, finds min, max, and average number of nodes in tables."""
+ max = avg = 0
+ min = None
+ def count(buckets):
+ c = 0
+ for bucket in buckets:
+ c = c + len(bucket.l)
+ return c
+ for node in l:
+ c = count(node.table.buckets)
+ if min == None:
+ min = c
+ elif c < min:
+ min = c
+ if c > max:
+ max = c
+ avg = avg + c
+ avg = avg / len(l)
+ return {'min':min, 'max':max, 'avg':avg}
+
+def uncompact(s):
+ """Extract the contact info from a compact node representation.
+
+ @type s: C{string}
+ @param s: the compact representation
+ @rtype: C{dictionary}
+ @return: the node ID, IP address and port to contact the node on
+ @raise ValueError: if the compact representation doesn't exist
+ """
+ if (len(s) != 26):
+ raise ValueError
+ id = s[:20]
+ host = '.'.join([str(ord(i)) for i in s[20:24]])
+ port = (ord(s[24]) << 8) | ord(s[25])
+ return {'id': id, 'host': host, 'port': port}
+
+def compact(id, host, port):
+ """Create a compact representation of node contact info.
+
+ @type id: C{string}
+ @param id: the node ID
+ @type host: C{string}
+ @param host: the IP address of the node
+ @type port: C{int}
+ @param port: the port number to contact the node on
+ @rtype: C{string}
+ @return: the compact representation
+ @raise ValueError: if the compact representation doesn't exist
+ """
+
+ s = id + ''.join([chr(int(i)) for i in host.split('.')]) + \
+ chr((port & 0xFF00) >> 8) + chr(port & 0xFF)
+ if len(s) != 26:
+ raise ValueError
+ return s
+
+class TestUtil(unittest.TestCase):
+ """Tests for the utilities."""
+
+ timeout = 5
+ myid = '\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!'
+ host = '165.234.1.34'
+ port = 61234
+
+ def test_compact(self):
+ d = uncompact(compact(self.myid, self.host, self.port))
+ self.failUnlessEqual(d['id'], self.myid)
+ self.failUnlessEqual(d['host'], self.host)
+ self.failUnlessEqual(d['port'], self.port)
+
\ No newline at end of file
+++ /dev/null
-<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
-
- <!-- Fill in your name for FIRSTNAME and SURNAME. -->
- <!ENTITY dhfirstname "<firstname>Cameron</firstname>">
- <!ENTITY dhsurname "<surname>Dale</surname>">
- <!-- Please adjust the date whenever revising the manpage. -->
- <!ENTITY dhdate "<date>February 17, 2008</date>">
- <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
- allowed: see man(7), man(1). -->
- <!ENTITY dhsection "<manvolnum>5</manvolnum>">
- <!ENTITY dhemail "<email>camrdale@gmail.com</email>">
- <!ENTITY dhusername "Cameron Dale">
- <!ENTITY dhucpackage "<refentrytitle>apt-p2p.conf</refentrytitle>">
- <!ENTITY dhpackage "apt-p2p">
-
- <!ENTITY debian "<productname>Debian</productname>">
- <!ENTITY gnu "<acronym>GNU</acronym>">
- <!ENTITY gpl "&gnu; <acronym>GPL</acronym>">
-]>
-
-<refentry>
- <refentryinfo>
- <address>
- &dhemail;
- </address>
- <author>
- &dhfirstname;
- &dhsurname;
- </author>
- <copyright>
- <year>2008</year>
- <holder>&dhusername;</holder>
- </copyright>
- &dhdate;
- </refentryinfo>
- <refmeta>
- &dhucpackage;
-
- &dhsection;
- </refmeta>
-
- <refnamediv>
- <refname>&dhpackage;</refname>
-
- <refpurpose>configuration file for &dhpackage;</refpurpose>
- </refnamediv>
-
- <refsect1>
- <title>DESCRIPTION</title>
-
- <para>Configuration information for &dhpackage; is searched for in the following order, with later
- entries overriding former ones:
- <OrderedList>
- <ListItem><para>/etc/apt-p2p/apt-p2p.conf</para></ListItem>
- <ListItem><para>${HOME}/.apt-p2p/apt-p2p.conf</para></ListItem>
- <ListItem><para>the location specified by the config-file parameter</para></ListItem>
- </OrderedList>
- </para>
-
- </refsect1>
- <refsect1>
- <title>FORMAT</title>
-
- <para>&dhpackage; has a structure similar to Microsoft Windows INI files.
- The configuration file consists of sections, led by a ``[section]'' header and followed
- by ``name = value'' or ``name: value'' entries, with continuations in the style of RFC 822
- (values can span multiple lines by starting the subsequent lines with one or more spaces).
- Some values indicate times, in which case a suffix of 'd' for
- days, 'h' for hours, 'm' for minutes, and 's' for seconds can be used.
- Boolean values can be '1', 'yes', 'true', and 'on' to evaluate to True,
- or '0', 'no', 'false', and 'off' to evaluate to false.
- Note that leading whitespace is removed from values, and case is not important.
- Lines beginning with "#" or ";" are ignored and may be used to provide comments.</para>
-
- </refsect1>
- <refsect1>
- <title>VARIABLES</title>
- <para>There are 2 required sections in the config file. The first is the DEFAULT section, providing
- variables for the configuration of the main application. The second is the section that provides
- variables for the configuration of the DHT.</para>
-
- <refsect2>
- <title>DEFAULT</title>
-
- <variablelist>
- <varlistentry>
- <term><option>PORT = <replaceable>number</replaceable></option></term>
- <listitem>
- <para>The <replaceable>number</replaceable> of the port to listen on for requests.
- The main application will use this TCP port to listen for requests from APT, and
- for uploads to other peers. If a port is not specified for the DHT, it will also
- use this UDP port to listen for DHT requests.
- (Default is 9977.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>CACHE_DIR = <replaceable>directory</replaceable></option></term>
- <listitem>
- <para>The <replaceable>directory</replaceable> to store the downloaded files in.
- (Default is ${HOME}/.apt-p2p/cache.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>OTHER_DIRS = <replaceable>list</replaceable></option></term>
- <listitem>
- <para>The <replaceable>list</replaceable> of directories containing packages to share with others.
- All files in these directories will be hashed and available for everybody to download.
- (Default is to share only the files downloaded.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>LOCAL_OK = <replaceable>boolean</replaceable></option></term>
- <listitem>
- <para>Whether it's OK to use an IP addres from a known local or private range.
- (Default is false)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>UNLOAD_PACKAGES_CACHE = <replaceable>time</replaceable></option></term>
- <listitem>
- <para>The <replaceable>time</replaceable> of inactivity to wait for before unloading the
- packages cache. The packages cache uses a lot of memory, and only takes a few seconds
- to reload when a new request arrives. (Default is 5 minutes.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>KEY_REFRESH = <replaceable>time</replaceable></option></term>
- <listitem>
- <para>The <replaceable>time</replaceable> after which to refresh DHT keys.
- This should be a time slightly less than the DHT's KEY_EXPIRE value.
- (Default is 57 minutes.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>DHT = <replaceable>string</replaceable></option></term>
- <listitem>
- <para>The DHT implementation to use. It must be possile to do (in python)
- ``from <DHT>.DHT import DHT'' to get a class that implements the IDHT interface.
- There should also be a similarly named section below to specify the options for the DHT.
- (Default is `apt_p2p_Khashmir')</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>DHT-ONLY = <replaceable>boolean</replaceable></option></term>
- <listitem>
- <para>Whether to only run the DHT. This can be useful for providing only a bootstrap node.
- (Default is false)</para>
- </listitem>
- </varlistentry>
- </variablelist>
- </refsect2>
- <refsect2>
- <title>apt_p2p_Khashmir</title>
-
- <variablelist>
- <varlistentry>
- <term><option>PORT = <replaceable>number</replaceable></option></term>
- <listitem>
- <para>The <replaceable>number</replaceable> of the port to listen on for DHT (UDP) requests.
- (Default is to use the value specified in the DEFAULT section.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>BOOTSTRAP = <replaceable>list</replaceable></option></term>
- <listitem>
- <para>The <replaceable>list</replaceable> of bootstrap nodes to contact to join the DHT.
- Each node should be on a separate line, and start with the IP address or host name,
- followed by a colon and the port number.
- (Default is a list of known good nodes.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>BOOTSTRAP_NODE = <replaceable>boolean</replaceable></option></term>
- <listitem>
- <para>Whether this node is a bootstrap node.
- (Default is false)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>K = <replaceable>number</replaceable></option></term>
- <listitem>
- <para>The <replaceable>number</replaceable> of the Kademlia "K" constant.
- It should be an even number.
- (Default is 8.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>HASH_LENGTH = <replaceable>number</replaceable></option></term>
- <listitem>
- <para>The <replaceable>number</replaceable> of bits in the hash to use.
- (Default is 160.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>CHECKPOINT_INTERVAL = <replaceable>time</replaceable></option></term>
- <listitem>
- <para>The <replaceable>time</replaceable> to wait between saves of the running state.
- (Default is 5 minutes.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>CONCURRENT_REQS = <replaceable>number</replaceable></option></term>
- <listitem>
- <para>The concurrent <replaceable>number</replaceable> of calls per find node/value request.
- (Default is 4.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>STORE_REDUNDANCY = <replaceable>number</replaceable></option></term>
- <listitem>
- <para>The <replaceable>number</replaceable> of redundant copies of a value to store in the DHT.
- (Default is 3.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>RETRIEVE_VALUES = <replaceable>number</replaceable></option></term>
- <listitem>
- <para>The <replaceable>number</replaceable> of values to attempt to retrieve from the DHT.
- Setting this to 0 will try and get all values (which could take a while if
- a lot of nodes have values). Setting it negative will try to get that
- number of results from only the closest STORE_REDUNDANCY nodes to the hash.
- (Default is -10000, which is a large negative number so all values from the closest
- STORE_REDUNDANCY nodes will be retrieved.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>MAX_FAILURES = <replaceable>number</replaceable></option></term>
- <listitem>
- <para>The <replaceable>number</replaceable> of times in a row a node can fail to
- respond before it's booted from the routing table.
- (Default is 3.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>MIN_PING_INTERVAL = <replaceable>time</replaceable></option></term>
- <listitem>
- <para>The minimum <replaceable>time</replaceable> to wait before re-pinging a node.
- (Default is 15 minutes.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>BUCKET_STALENESS = <replaceable>time</replaceable></option></term>
- <listitem>
- <para>The maximum <replaceable>time</replaceable> to wait before refreshing a bucket.
- (Default is 1 hour.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>KEY_EXPIRE = <replaceable>time</replaceable></option></term>
- <listitem>
- <para>The <replaceable>time</replaceable> to wait before expiring unrefreshed keys.
- (Default is 1 hour.)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>SPEW = <replaceable>boolean</replaceable></option></term>
- <listitem>
- <para>Whether to log lots of info about the requests and responses in the protocol.
- (Default is false)</para>
- </listitem>
- </varlistentry>
- </variablelist>
- </refsect2>
- </refsect1>
-
- <refsect1>
- <title>SEE ALSO</title>
-
- <para>
- <citerefentry><refentrytitle>apt-p2p</refentrytitle><manvolnum>8</manvolnum></citerefentry>
- </para>
- </refsect1>
- <refsect1>
- <title>AUTHOR</title>
- <para>This manual page was written by &dhusername; <&dhemail;> for
- the &debian; system (but may be used by others). Permission is
- granted to copy, distribute and/or modify this document under
- the terms of the &gnu; General Public License, Version 2 or any
- later version published by the Free Software Foundation.
- </para>
- <para>
- On Debian systems, the complete text of the GNU General Public
- License can be found in /usr/share/common-licenses/GPL.
- </para>
- </refsect1>
-</refentry>
-
-<!-- Keep this comment at the end of the file
-Local variables:
-mode: sgml
-sgml-omittag:t
-sgml-shorttag:t
-sgml-minimize-attributes:nil
-sgml-always-quote-attributes:t
-sgml-indent-step:2
-sgml-indent-data:t
-sgml-parent-document:nil
-sgml-default-dtd-file:nil
-sgml-exposed-tags:nil
-sgml-local-catalogs:nil
-sgml-local-ecat-files:nil
-End:
--->
+++ /dev/null
-<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
-
- <!-- Fill in your name for FIRSTNAME and SURNAME. -->
- <!ENTITY dhfirstname "<firstname>Cameron</firstname>">
- <!ENTITY dhsurname "<surname>Dale</surname>">
- <!-- Please adjust the date whenever revising the manpage. -->
- <!ENTITY dhdate "<date>February 17, 2008</date>">
- <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
- allowed: see man(7), man(1). -->
- <!ENTITY dhsection "<manvolnum>8</manvolnum>">
- <!ENTITY dhemail "<email>camrdale@gmail.com</email>">
- <!ENTITY dhusername "Cameron Dale">
- <!ENTITY dhucpackage "<refentrytitle>apt-p2p</refentrytitle>">
- <!ENTITY dhpackage "apt-p2p">
-
- <!ENTITY debian "<productname>Debian</productname>">
- <!ENTITY gnu "<acronym>GNU</acronym>">
- <!ENTITY gpl "&gnu; <acronym>GPL</acronym>">
-]>
-
-<refentry>
- <refentryinfo>
- <address>
- &dhemail;
- </address>
- <author>
- &dhfirstname;
- &dhsurname;
- </author>
- <copyright>
- <year>2008</year>
- <holder>&dhusername;</holder>
- </copyright>
- &dhdate;
- </refentryinfo>
- <refmeta>
- &dhucpackage;
-
- &dhsection;
- </refmeta>
-
- <refnamediv>
- <refname>&dhpackage;</refname>
-
- <refpurpose>apt helper for peer-to-peer downloads of Debian packages</refpurpose>
- </refnamediv>
- <refsynopsisdiv>
- <para>Normally &dhpackage; is run from init.d using <command>twistd</command>, in which case no &dhpackage;
- options can be specified on the command-line, and all configuration variables are
- read from the default config file locations of ${HOME}/.apt-p2p.conf and /etc/apt-p2p/apt-p2p.conf.
- The command is then:</para>
- <cmdsynopsis>
- <command>twistd</command> <arg choice="Req">-y /path/to/&dhpackage;</arg> <arg rep="Repeat"><option>twistd options</option></arg>
- </cmdsynopsis>
- <para>For a detailed listing of the twistd options, see
- <citerefentry><refentrytitle>twistd</refentrytitle><manvolnum>1</manvolnum></citerefentry></para>
- <para>&dhpackage; can also be run without twistd by executing:</para>
- <cmdsynopsis>
- <command>&dhpackage;</command> <arg rep="Repeat"><option>options</option></arg>
- </cmdsynopsis>
- <para>In this case, you can specify the options documented below on the command-line.</para>
- </refsynopsisdiv>
- <refsect1>
- <title>DESCRIPTION</title>
-
- <para>This manual page documents briefly the options available to the &dhpackage; command.</para>
-
- <para><command>&dhpackage;</command> is a helper for downloading Debian packages files with APT.
- It will download any needed files from other Apt-P2P peers in a
- bittorrent-like manner, and so reduce the strain on the Debian mirrors.</para>
-
- <para>In order for APT to send it's requests to &dhpackage;, the source.list entries must be modified to point to the
- local &dhpackage; address. Unless you have changed the default port, then adding "localhost:9977/" to the beginning
- of each entry should be sufficient. For example, if your sources.list contains a line like this:</para>
-
- <para>deb http://ftp.us.debian.org/debian etch main contrib non-free</para>
-
- <para>then replace it with this:</para>
-
- <para>deb http://localhost:9977/ftp.us.debian.org/debian etch main contrib non-free</para>
-
- <para>The port can be changed in the &dhpackage; config file in /etc/&dhpackage;/&dhpackage;.conf,
- but it defaults to 9977.</para>
- </refsect1>
- <refsect1>
- <title>OPTIONS</title>
-
- <para>These programs follow the usual &gnu; command line syntax,
- with short options starting with a single dash (`-'),
- and long options starting with two dashes (`--').
- A summary of options is included below.</para>
-
- <variablelist>
- <varlistentry>
- <term><option>-c <replaceable>filename</replaceable>,</option> <option>--configfile=<replaceable>filename</replaceable></option></term>
- <listitem>
- <para>the <replaceable>filename</replaceable> to use for the configuration file, options found in this
- file are combined with those in ${HOME}/.apt-p2p/apt-p2p.conf and /etc/apt-p2p/apt-p2p.conf
- (see <citerefentry><refentrytitle>apt-p2p.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry> for the format of the file)</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>-l <replaceable>filename</replaceable>,</option> <option>--logfile=<replaceable>filename</replaceable></option></term>
- <listitem>
- <para>the <replaceable>filename</replaceable> to print log messages to,
- or `-' to log to standard output, if not specified then
- /var/log/apt-p2p.log will be used</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>-h</option>, <option>--help</option></term>
- <listitem>
- <para>print a help message describing the invocation of the program</para>
- </listitem>
- </varlistentry>
- <varlistentry>
- <term><option>-v</option>, <option>--version</option></term>
- <listitem>
- <para>print the version of the program</para>
- </listitem>
- </varlistentry>
- </variablelist>
- </refsect1>
-
- <refsect1>
- <title>SEE ALSO</title>
-
- <para>
- <citerefentry><refentrytitle>apt-p2p.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
- <citerefentry><refentrytitle>twistd</refentrytitle><manvolnum>1</manvolnum></citerefentry>
- </para>
- </refsect1>
- <refsect1>
- <title>AUTHOR</title>
- <para>This manual page was written by &dhusername; <&dhemail;> for
- the &debian; system (but may be used by others). Permission is
- granted to copy, distribute and/or modify this document under
- the terms of the &gnu; General Public License, Version 2 or any
- later version published by the Free Software Foundation.
- </para>
- <para>
- On Debian systems, the complete text of the GNU General Public
- License can be found in /usr/share/common-licenses/GPL.
- </para>
- </refsect1>
-</refentry>
-
-<!-- Keep this comment at the end of the file
-Local variables:
-mode: sgml
-sgml-omittag:t
-sgml-shorttag:t
-sgml-minimize-attributes:nil
-sgml-always-quote-attributes:t
-sgml-indent-step:2
-sgml-indent-data:t
-sgml-parent-document:nil
-sgml-default-dtd-file:nil
-sgml-exposed-tags:nil
-sgml-local-catalogs:nil
-sgml-local-ecat-files:nil
-End:
--->
--- /dev/null
+<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
+
+ <!-- Fill in your name for FIRSTNAME and SURNAME. -->
+ <!ENTITY dhfirstname "<firstname>Cameron</firstname>">
+ <!ENTITY dhsurname "<surname>Dale</surname>">
+ <!-- Please adjust the date whenever revising the manpage. -->
+ <!ENTITY dhdate "<date>February 17, 2008</date>">
+ <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
+ allowed: see man(7), man(1). -->
+ <!ENTITY dhsection "<manvolnum>5</manvolnum>">
+ <!ENTITY dhemail "<email>camrdale@gmail.com</email>">
+ <!ENTITY dhusername "Cameron Dale">
+ <!ENTITY dhucpackage "<refentrytitle>apt-p2p.conf</refentrytitle>">
+ <!ENTITY dhpackage "apt-p2p">
+
+ <!ENTITY debian "<productname>Debian</productname>">
+ <!ENTITY gnu "<acronym>GNU</acronym>">
+ <!ENTITY gpl "&gnu; <acronym>GPL</acronym>">
+]>
+
+<refentry>
+ <refentryinfo>
+ <address>
+ &dhemail;
+ </address>
+ <author>
+ &dhfirstname;
+ &dhsurname;
+ </author>
+ <copyright>
+ <year>2008</year>
+ <holder>&dhusername;</holder>
+ </copyright>
+ &dhdate;
+ </refentryinfo>
+ <refmeta>
+ &dhucpackage;
+
+ &dhsection;
+ </refmeta>
+
+ <refnamediv>
+ <refname>&dhpackage;</refname>
+
+ <refpurpose>configuration file for &dhpackage;</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>DESCRIPTION</title>
+
+ <para>Configuration information for &dhpackage; is searched for in the following order, with later
+ entries overriding former ones:
+ <OrderedList>
+ <ListItem><para>/etc/apt-p2p/apt-p2p.conf</para></ListItem>
+ <ListItem><para>${HOME}/.apt-p2p/apt-p2p.conf</para></ListItem>
+ <ListItem><para>the location specified by the config-file parameter</para></ListItem>
+ </OrderedList>
+ </para>
+
+ </refsect1>
+ <refsect1>
+ <title>FORMAT</title>
+
+ <para>&dhpackage; has a structure similar to Microsoft Windows INI files.
+ The configuration file consists of sections, led by a ``[section]'' header and followed
+ by ``name = value'' or ``name: value'' entries, with continuations in the style of RFC 822
+ (values can span multiple lines by starting the subsequent lines with one or more spaces).
+ Some values indicate times, in which case a suffix of 'd' for
+ days, 'h' for hours, 'm' for minutes, and 's' for seconds can be used.
+ Boolean values can be '1', 'yes', 'true', and 'on' to evaluate to True,
+ or '0', 'no', 'false', and 'off' to evaluate to false.
+ Note that leading whitespace is removed from values, and case is not important.
+ Lines beginning with "#" or ";" are ignored and may be used to provide comments.</para>
+
+ </refsect1>
+ <refsect1>
+ <title>VARIABLES</title>
+ <para>There are 2 required sections in the config file. The first is the DEFAULT section, providing
+ variables for the configuration of the main application. The second is the section that provides
+ variables for the configuration of the DHT.</para>
+
+ <refsect2>
+ <title>DEFAULT</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><option>PORT = <replaceable>number</replaceable></option></term>
+ <listitem>
+ <para>The <replaceable>number</replaceable> of the port to listen on for requests.
+ The main application will use this TCP port to listen for requests from APT, and
+ for uploads to other peers. If a port is not specified for the DHT, it will also
+ use this UDP port to listen for DHT requests.
+ (Default is 9977.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>CACHE_DIR = <replaceable>directory</replaceable></option></term>
+ <listitem>
+ <para>The <replaceable>directory</replaceable> to store the downloaded files in.
+ (Default is ${HOME}/.apt-p2p/cache.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>OTHER_DIRS = <replaceable>list</replaceable></option></term>
+ <listitem>
+ <para>The <replaceable>list</replaceable> of directories containing packages to share with others.
+ All files in these directories will be hashed and available for everybody to download.
+ (Default is to share only the files downloaded.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>LOCAL_OK = <replaceable>boolean</replaceable></option></term>
+ <listitem>
+ <para>Whether it's OK to use an IP addres from a known local or private range.
+ (Default is false)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>UNLOAD_PACKAGES_CACHE = <replaceable>time</replaceable></option></term>
+ <listitem>
+ <para>The <replaceable>time</replaceable> of inactivity to wait for before unloading the
+ packages cache. The packages cache uses a lot of memory, and only takes a few seconds
+ to reload when a new request arrives. (Default is 5 minutes.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>KEY_REFRESH = <replaceable>time</replaceable></option></term>
+ <listitem>
+ <para>The <replaceable>time</replaceable> after which to refresh DHT keys.
+ This should be a time slightly less than the DHT's KEY_EXPIRE value.
+ (Default is 57 minutes.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>DHT = <replaceable>string</replaceable></option></term>
+ <listitem>
+ <para>The DHT implementation to use. It must be possile to do (in python)
+ ``from <DHT>.DHT import DHT'' to get a class that implements the IDHT interface.
+ There should also be a similarly named section below to specify the options for the DHT.
+ (Default is `apt_p2p_Khashmir')</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>DHT-ONLY = <replaceable>boolean</replaceable></option></term>
+ <listitem>
+ <para>Whether to only run the DHT. This can be useful for providing only a bootstrap node.
+ (Default is false)</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+ <refsect2>
+ <title>apt_p2p_Khashmir</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><option>PORT = <replaceable>number</replaceable></option></term>
+ <listitem>
+ <para>The <replaceable>number</replaceable> of the port to listen on for DHT (UDP) requests.
+ (Default is to use the value specified in the DEFAULT section.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>BOOTSTRAP = <replaceable>list</replaceable></option></term>
+ <listitem>
+ <para>The <replaceable>list</replaceable> of bootstrap nodes to contact to join the DHT.
+ Each node should be on a separate line, and start with the IP address or host name,
+ followed by a colon and the port number.
+ (Default is a list of known good nodes.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>BOOTSTRAP_NODE = <replaceable>boolean</replaceable></option></term>
+ <listitem>
+ <para>Whether this node is a bootstrap node.
+ (Default is false)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>K = <replaceable>number</replaceable></option></term>
+ <listitem>
+ <para>The <replaceable>number</replaceable> of the Kademlia "K" constant.
+ It should be an even number.
+ (Default is 8.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>HASH_LENGTH = <replaceable>number</replaceable></option></term>
+ <listitem>
+ <para>The <replaceable>number</replaceable> of bits in the hash to use.
+ (Default is 160.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>CHECKPOINT_INTERVAL = <replaceable>time</replaceable></option></term>
+ <listitem>
+ <para>The <replaceable>time</replaceable> to wait between saves of the running state.
+ (Default is 5 minutes.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>CONCURRENT_REQS = <replaceable>number</replaceable></option></term>
+ <listitem>
+ <para>The concurrent <replaceable>number</replaceable> of calls per find node/value request.
+ (Default is 4.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>STORE_REDUNDANCY = <replaceable>number</replaceable></option></term>
+ <listitem>
+ <para>The <replaceable>number</replaceable> of redundant copies of a value to store in the DHT.
+ (Default is 3.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>RETRIEVE_VALUES = <replaceable>number</replaceable></option></term>
+ <listitem>
+ <para>The <replaceable>number</replaceable> of values to attempt to retrieve from the DHT.
+ Setting this to 0 will try and get all values (which could take a while if
+ a lot of nodes have values). Setting it negative will try to get that
+ number of results from only the closest STORE_REDUNDANCY nodes to the hash.
+ (Default is -10000, which is a large negative number so all values from the closest
+ STORE_REDUNDANCY nodes will be retrieved.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>MAX_FAILURES = <replaceable>number</replaceable></option></term>
+ <listitem>
+ <para>The <replaceable>number</replaceable> of times in a row a node can fail to
+ respond before it's booted from the routing table.
+ (Default is 3.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>MIN_PING_INTERVAL = <replaceable>time</replaceable></option></term>
+ <listitem>
+ <para>The minimum <replaceable>time</replaceable> to wait before re-pinging a node.
+ (Default is 15 minutes.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>BUCKET_STALENESS = <replaceable>time</replaceable></option></term>
+ <listitem>
+ <para>The maximum <replaceable>time</replaceable> to wait before refreshing a bucket.
+ (Default is 1 hour.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>KEY_EXPIRE = <replaceable>time</replaceable></option></term>
+ <listitem>
+ <para>The <replaceable>time</replaceable> to wait before expiring unrefreshed keys.
+ (Default is 1 hour.)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>SPEW = <replaceable>boolean</replaceable></option></term>
+ <listitem>
+ <para>Whether to log lots of info about the requests and responses in the protocol.
+ (Default is false)</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+
+ <para>
+ <citerefentry><refentrytitle>apt-p2p</refentrytitle><manvolnum>8</manvolnum></citerefentry>
+ </para>
+ </refsect1>
+ <refsect1>
+ <title>AUTHOR</title>
+ <para>This manual page was written by &dhusername; <&dhemail;> for
+ the &debian; system (but may be used by others). Permission is
+ granted to copy, distribute and/or modify this document under
+ the terms of the &gnu; General Public License, Version 2 or any
+ later version published by the Free Software Foundation.
+ </para>
+ <para>
+ On Debian systems, the complete text of the GNU General Public
+ License can be found in /usr/share/common-licenses/GPL.
+ </para>
+ </refsect1>
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:t
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:2
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:nil
+sgml-exposed-tags:nil
+sgml-local-catalogs:nil
+sgml-local-ecat-files:nil
+End:
+-->
--- /dev/null
+<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
+
+ <!-- Fill in your name for FIRSTNAME and SURNAME. -->
+ <!ENTITY dhfirstname "<firstname>Cameron</firstname>">
+ <!ENTITY dhsurname "<surname>Dale</surname>">
+ <!-- Please adjust the date whenever revising the manpage. -->
+ <!ENTITY dhdate "<date>February 17, 2008</date>">
+ <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
+ allowed: see man(7), man(1). -->
+ <!ENTITY dhsection "<manvolnum>8</manvolnum>">
+ <!ENTITY dhemail "<email>camrdale@gmail.com</email>">
+ <!ENTITY dhusername "Cameron Dale">
+ <!ENTITY dhucpackage "<refentrytitle>apt-p2p</refentrytitle>">
+ <!ENTITY dhpackage "apt-p2p">
+
+ <!ENTITY debian "<productname>Debian</productname>">
+ <!ENTITY gnu "<acronym>GNU</acronym>">
+ <!ENTITY gpl "&gnu; <acronym>GPL</acronym>">
+]>
+
+<refentry>
+ <refentryinfo>
+ <address>
+ &dhemail;
+ </address>
+ <author>
+ &dhfirstname;
+ &dhsurname;
+ </author>
+ <copyright>
+ <year>2008</year>
+ <holder>&dhusername;</holder>
+ </copyright>
+ &dhdate;
+ </refentryinfo>
+ <refmeta>
+ &dhucpackage;
+
+ &dhsection;
+ </refmeta>
+
+ <refnamediv>
+ <refname>&dhpackage;</refname>
+
+ <refpurpose>apt helper for peer-to-peer downloads of Debian packages</refpurpose>
+ </refnamediv>
+ <refsynopsisdiv>
+ <para>Normally &dhpackage; is run from init.d using <command>twistd</command>, in which case no &dhpackage;
+ options can be specified on the command-line, and all configuration variables are
+ read from the default config file locations of ${HOME}/.apt-p2p.conf and /etc/apt-p2p/apt-p2p.conf.
+ The command is then:</para>
+ <cmdsynopsis>
+ <command>twistd</command> <arg choice="Req">-y /path/to/&dhpackage;</arg> <arg rep="Repeat"><option>twistd options</option></arg>
+ </cmdsynopsis>
+ <para>For a detailed listing of the twistd options, see
+ <citerefentry><refentrytitle>twistd</refentrytitle><manvolnum>1</manvolnum></citerefentry></para>
+ <para>&dhpackage; can also be run without twistd by executing:</para>
+ <cmdsynopsis>
+ <command>&dhpackage;</command> <arg rep="Repeat"><option>options</option></arg>
+ </cmdsynopsis>
+ <para>In this case, you can specify the options documented below on the command-line.</para>
+ </refsynopsisdiv>
+ <refsect1>
+ <title>DESCRIPTION</title>
+
+ <para>This manual page documents briefly the options available to the &dhpackage; command.</para>
+
+ <para><command>&dhpackage;</command> is a helper for downloading Debian packages files with APT.
+ It will download any needed files from other Apt-P2P peers in a
+ bittorrent-like manner, and so reduce the strain on the Debian mirrors.</para>
+
+ <para>In order for APT to send it's requests to &dhpackage;, the source.list entries must be modified to point to the
+ local &dhpackage; address. Unless you have changed the default port, then adding "localhost:9977/" to the beginning
+ of each entry should be sufficient. For example, if your sources.list contains a line like this:</para>
+
+ <para>deb http://ftp.us.debian.org/debian etch main contrib non-free</para>
+
+ <para>then replace it with this:</para>
+
+ <para>deb http://localhost:9977/ftp.us.debian.org/debian etch main contrib non-free</para>
+
+ <para>The port can be changed in the &dhpackage; config file in /etc/&dhpackage;/&dhpackage;.conf,
+ but it defaults to 9977.</para>
+ </refsect1>
+ <refsect1>
+ <title>OPTIONS</title>
+
+ <para>These programs follow the usual &gnu; command line syntax,
+ with short options starting with a single dash (`-'),
+ and long options starting with two dashes (`--').
+ A summary of options is included below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><option>-c <replaceable>filename</replaceable>,</option> <option>--configfile=<replaceable>filename</replaceable></option></term>
+ <listitem>
+ <para>the <replaceable>filename</replaceable> to use for the configuration file, options found in this
+ file are combined with those in ${HOME}/.apt-p2p/apt-p2p.conf and /etc/apt-p2p/apt-p2p.conf
+ (see <citerefentry><refentrytitle>apt-p2p.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry> for the format of the file)</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>-l <replaceable>filename</replaceable>,</option> <option>--logfile=<replaceable>filename</replaceable></option></term>
+ <listitem>
+ <para>the <replaceable>filename</replaceable> to print log messages to,
+ or `-' to log to standard output, if not specified then
+ /var/log/apt-p2p.log will be used</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>-h</option>, <option>--help</option></term>
+ <listitem>
+ <para>print a help message describing the invocation of the program</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><option>-v</option>, <option>--version</option></term>
+ <listitem>
+ <para>print the version of the program</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>SEE ALSO</title>
+
+ <para>
+ <citerefentry><refentrytitle>apt-p2p.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
+ <citerefentry><refentrytitle>twistd</refentrytitle><manvolnum>1</manvolnum></citerefentry>
+ </para>
+ </refsect1>
+ <refsect1>
+ <title>AUTHOR</title>
+ <para>This manual page was written by &dhusername; <&dhemail;> for
+ the &debian; system (but may be used by others). Permission is
+ granted to copy, distribute and/or modify this document under
+ the terms of the &gnu; General Public License, Version 2 or any
+ later version published by the Free Software Foundation.
+ </para>
+ <para>
+ On Debian systems, the complete text of the GNU General Public
+ License can be found in /usr/share/common-licenses/GPL.
+ </para>
+ </refsect1>
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:t
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:2
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:nil
+sgml-exposed-tags:nil
+sgml-local-catalogs:nil
+sgml-local-ecat-files:nil
+End:
+-->
+++ /dev/null
-[General]
-img_extIsRegExp=false
-img_extensions=.eps .pdf .dvi .ps .fig .gif .jpg .jpeg .png
-kileprversion=1
-kileversion=1.9.2
-lastDocument=motivation.tex
-masterDocument=
-name=apt-p2p-motivation
-pkg_extIsRegExp=false
-pkg_extensions=.cls .sty .dtx
-src_extIsRegExp=false
-src_extensions=.tex .ltx .bib .mp
-
-[Tools]
-MakeIndex=
-QuickBuild=LaTeX+DVItoPDF+ViewPDF
-
-[item:all.bib]
-archive=true
-column=20
-encoding=UTF-8
-highlight=BibTeX
-line=225
-open=true
-
-[item:apt-p2p-motivation.kilepr]
-archive=true
-column=0
-encoding=
-highlight=
-line=0
-open=false
-
-[item:motivation.tex]
-archive=true
-column=0
-encoding=UTF-8
-highlight=LaTeX
-line=347
-open=true
--- /dev/null
+[General]
+img_extIsRegExp=false
+img_extensions=.eps .pdf .dvi .ps .fig .gif .jpg .jpeg .png
+kileprversion=1
+kileversion=1.9.2
+lastDocument=motivation.tex
+masterDocument=
+name=apt-p2p-motivation
+pkg_extIsRegExp=false
+pkg_extensions=.cls .sty .dtx
+src_extIsRegExp=false
+src_extensions=.tex .ltx .bib .mp
+
+[Tools]
+MakeIndex=
+QuickBuild=LaTeX+DVItoPDF+ViewPDF
+
+[item:all.bib]
+archive=true
+column=20
+encoding=UTF-8
+highlight=BibTeX
+line=225
+open=true
+
+[item:apt-p2p-motivation.kilepr]
+archive=true
+column=0
+encoding=
+highlight=
+line=0
+open=false
+
+[item:motivation.tex]
+archive=true
+column=0
+encoding=UTF-8
+highlight=LaTeX
+line=347
+open=true
+++ /dev/null
-%!PS-Adobe-2.0 EPSF-1.2
-%%Creator: MATLAB, The Mathworks, Inc. Version 7.5.0.338 (R2007b). Operating System: Linux 2.6.18.8-0.7-default #1 SMP Tue Oct 2 17:21:08 UTC 2007 i686.
-%%Title: /cs/grad1/camerond/school/matlab/cache/apt_dht_simulation-size_CDF.20080208T171700.eps
-%%CreationDate: 02/08/2008 17:17:44
-%%DocumentNeededFonts: Helvetica
-%%DocumentProcessColors: Cyan Magenta Yellow Black
-%%Extensions: CMYK
-%%Pages: 1
-%%BoundingBox: 58 196 550 591
-%%EndComments
-
-%%BeginProlog
-% MathWorks dictionary
-/MathWorks 160 dict begin
-% definition operators
-/bdef {bind def} bind def
-/ldef {load def} bind def
-/xdef {exch def} bdef
-/xstore {exch store} bdef
-% operator abbreviations
-/c /clip ldef
-/cc /concat ldef
-/cp /closepath ldef
-/gr /grestore ldef
-/gs /gsave ldef
-/mt /moveto ldef
-/np /newpath ldef
-/cm /currentmatrix ldef
-/sm /setmatrix ldef
-/rm /rmoveto ldef
-/rl /rlineto ldef
-/s {show newpath} bdef
-/sc {setcmykcolor} bdef
-/sr /setrgbcolor ldef
-/sg /setgray ldef
-/w /setlinewidth ldef
-/j /setlinejoin ldef
-/cap /setlinecap ldef
-/rc {rectclip} bdef
-/rf {rectfill} bdef
-% page state control
-/pgsv () def
-/bpage {/pgsv save def} bdef
-/epage {pgsv restore} bdef
-/bplot /gsave ldef
-/eplot {stroke grestore} bdef
-% orientation switch
-/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
-% coordinate system mappings
-/dpi2point 0 def
-% font control
-/FontSize 0 def
-/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
- makefont setfont} bdef
-/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
- exch dup 3 1 roll findfont dup length dict begin
- { 1 index /FID ne {def}{pop pop} ifelse } forall
- /Encoding exch def currentdict end definefont pop} bdef
-/isroman {findfont /CharStrings get /Agrave known} bdef
-/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
- exch FMS} bdef
-/csm {1 dpi2point div -1 dpi2point div scale neg translate
- dup landscapeMode eq {pop -90 rotate}
- {rotateMode eq {90 rotate} if} ifelse} bdef
-% line types: solid, dotted, dashed, dotdash
-/SO { [] 0 setdash } bdef
-/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
-/DA { [6 dpi2point mul] 0 setdash } bdef
-/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
- dpi2point mul] 0 setdash } bdef
-% macros for lines and objects
-/L {lineto stroke} bdef
-/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
-/AP {{rlineto} repeat} bdef
-/PDlw -1 def
-/W {/PDlw currentlinewidth def setlinewidth} def
-/PP {closepath eofill} bdef
-/DP {closepath stroke} bdef
-/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto
- neg 0 exch rlineto closepath} bdef
-/FR {MR stroke} bdef
-/PR {MR fill} bdef
-/L1i {{currentfile picstr readhexstring pop} image} bdef
-/tMatrix matrix def
-/MakeOval {newpath tMatrix currentmatrix pop translate scale
-0 0 1 0 360 arc tMatrix setmatrix} bdef
-/FO {MakeOval stroke} bdef
-/PO {MakeOval fill} bdef
-/PD {currentlinewidth 2 div 0 360 arc fill
- PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
-/FA {newpath tMatrix currentmatrix pop translate scale
- 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
-/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
- 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
-/FAn {newpath tMatrix currentmatrix pop translate scale
- 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
-/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
- 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
-/vradius 0 def /hradius 0 def /lry 0 def
-/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
-/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
- /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
- vradius add translate hradius vradius scale 0 0 1 180 270 arc
- tMatrix setmatrix lrx hradius sub uly vradius add translate
- hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
- lrx hradius sub lry vradius sub translate hradius vradius scale
- 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
- translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
- closepath} bdef
-/FRR {MRR stroke } bdef
-/PRR {MRR fill } bdef
-/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
- newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
- rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
- sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
- closepath} bdef
-/FlrRR {MlrRR stroke } bdef
-/PlrRR {MlrRR fill } bdef
-/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
- newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
- rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
- sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
- closepath} bdef
-/FtbRR {MtbRR stroke } bdef
-/PtbRR {MtbRR fill } bdef
-/stri 6 array def /dtri 6 array def
-/smat 6 array def /dmat 6 array def
-/tmat1 6 array def /tmat2 6 array def /dif 3 array def
-/asub {/ind2 exch def /ind1 exch def dup dup
- ind1 get exch ind2 get sub exch } bdef
-/tri_to_matrix {
- 2 0 asub 3 1 asub 4 0 asub 5 1 asub
- dup 0 get exch 1 get 7 -1 roll astore } bdef
-/compute_transform {
- dmat dtri tri_to_matrix tmat1 invertmatrix
- smat stri tri_to_matrix tmat2 concatmatrix } bdef
-/ds {stri astore pop} bdef
-/dt {dtri astore pop} bdef
-/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
- currentfile exch readhexstring pop
- dup 0 3 index getinterval /rbmap xdef
- dup 2 index dup getinterval /gbmap xdef
- 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
-/it {gs np dtri aload pop moveto lineto lineto cp c
- cols rows 8 compute_transform
- rbmap gbmap bbmap true 3 colorimage gr}bdef
-/il {newpath moveto lineto stroke}bdef
-currentdict end def
-%%EndProlog
-
-%%BeginSetup
-MathWorks begin
-
-0 cap
-
-end
-%%EndSetup
-
-%%Page: 1 1
-%%BeginPageSetup
-%%PageBoundingBox: 58 196 550 591
-MathWorks begin
-bpage
-%%EndPageSetup
-
-%%BeginObject: obj1
-bplot
-
-/dpi2point 12 def
-portraitMode 0216 7344 csm
-
- 480 247 5913 4745 MR c np
-76 dict begin %Colortable dictionary
-/c0 { 0.000000 0.000000 0.000000 sr} bdef
-/c1 { 1.000000 1.000000 1.000000 sr} bdef
-/c2 { 0.900000 0.000000 0.000000 sr} bdef
-/c3 { 0.000000 0.820000 0.000000 sr} bdef
-/c4 { 0.000000 0.000000 0.800000 sr} bdef
-/c5 { 0.910000 0.820000 0.320000 sr} bdef
-/c6 { 1.000000 0.260000 0.820000 sr} bdef
-/c7 { 0.000000 0.820000 0.820000 sr} bdef
-c0
-1 j
-1 sg
- 0 0 6913 5186 PR
-6 w
-0 4226 5356 0 0 -4226 899 4615 4 MP
-PP
--5356 0 0 4226 5356 0 0 -4226 899 4615 5 MP stroke
-4 w
-DO
-0 sg
- 899 4615 mt 899 389 L
- 899 389 mt 899 389 L
-1970 4615 mt 1970 389 L
-1970 389 mt 1970 389 L
-3041 4615 mt 3041 389 L
-3041 389 mt 3041 389 L
-4112 4615 mt 4112 389 L
-4112 389 mt 4112 389 L
-5183 4615 mt 5183 389 L
-5183 389 mt 5183 389 L
-6255 4615 mt 6255 389 L
-6255 389 mt 6255 389 L
- 899 4615 mt 6255 4615 L
-6255 4615 mt 6255 4615 L
- 899 4192 mt 6255 4192 L
-6255 4192 mt 6255 4192 L
- 899 3769 mt 6255 3769 L
-6255 3769 mt 6255 3769 L
- 899 3347 mt 6255 3347 L
-6255 3347 mt 6255 3347 L
- 899 2924 mt 6255 2924 L
-6255 2924 mt 6255 2924 L
- 899 2502 mt 6255 2502 L
-6255 2502 mt 6255 2502 L
- 899 2079 mt 6255 2079 L
-6255 2079 mt 6255 2079 L
- 899 1656 mt 6255 1656 L
-6255 1656 mt 6255 1656 L
- 899 1234 mt 6255 1234 L
-6255 1234 mt 6255 1234 L
- 899 811 mt 6255 811 L
-6255 811 mt 6255 811 L
- 899 389 mt 6255 389 L
-6255 389 mt 6255 389 L
-SO
-6 w
- 899 4615 mt 6255 4615 L
- 899 389 mt 6255 389 L
- 899 4615 mt 899 389 L
-6255 4615 mt 6255 389 L
- 899 4615 mt 6255 4615 L
- 899 4615 mt 899 389 L
- 899 4615 mt 899 4588 L
- 899 389 mt 899 415 L
-DO
- 899 4615 mt 899 389 L
- 899 389 mt 899 389 L
-SO
- 899 4615 mt 899 4561 L
- 899 389 mt 899 442 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 120 FMSR
-
- 811 4797 mt
-(10) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 80 FMSR
-
- 944 4723 mt
-(0) s
-1221 4615 mt 1221 4588 L
-1221 389 mt 1221 415 L
-DO
-1221 4615 mt 1221 389 L
-1221 389 mt 1221 389 L
-SO
-1410 4615 mt 1410 4588 L
-1410 389 mt 1410 415 L
-DO
-1410 4615 mt 1410 389 L
-1410 389 mt 1410 389 L
-SO
-1543 4615 mt 1543 4588 L
-1543 389 mt 1543 415 L
-DO
-1543 4615 mt 1543 389 L
-1543 389 mt 1543 389 L
-SO
-1647 4615 mt 1647 4588 L
-1647 389 mt 1647 415 L
-DO
-1647 4615 mt 1647 389 L
-1647 389 mt 1647 389 L
-SO
-1732 4615 mt 1732 4588 L
-1732 389 mt 1732 415 L
-DO
-1732 4615 mt 1732 389 L
-1732 389 mt 1732 389 L
-SO
-1804 4615 mt 1804 4588 L
-1804 389 mt 1804 415 L
-DO
-1804 4615 mt 1804 389 L
-1804 389 mt 1804 389 L
-SO
-1866 4615 mt 1866 4588 L
-1866 389 mt 1866 415 L
-DO
-1866 4615 mt 1866 389 L
-1866 389 mt 1866 389 L
-SO
-1921 4615 mt 1921 4588 L
-1921 389 mt 1921 415 L
-DO
-1921 4615 mt 1921 389 L
-1921 389 mt 1921 389 L
-SO
-1970 4615 mt 1970 4588 L
-1970 389 mt 1970 415 L
-DO
-1970 4615 mt 1970 389 L
-1970 389 mt 1970 389 L
-SO
-1970 4615 mt 1970 4561 L
-1970 389 mt 1970 442 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 120 FMSR
-
-1882 4797 mt
-(10) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 80 FMSR
-
-2015 4723 mt
-(1) s
-2292 4615 mt 2292 4588 L
-2292 389 mt 2292 415 L
-DO
-2292 4615 mt 2292 389 L
-2292 389 mt 2292 389 L
-SO
-2481 4615 mt 2481 4588 L
-2481 389 mt 2481 415 L
-DO
-2481 4615 mt 2481 389 L
-2481 389 mt 2481 389 L
-SO
-2615 4615 mt 2615 4588 L
-2615 389 mt 2615 415 L
-DO
-2615 4615 mt 2615 389 L
-2615 389 mt 2615 389 L
-SO
-2718 4615 mt 2718 4588 L
-2718 389 mt 2718 415 L
-DO
-2718 4615 mt 2718 389 L
-2718 389 mt 2718 389 L
-SO
-2803 4615 mt 2803 4588 L
-2803 389 mt 2803 415 L
-DO
-2803 4615 mt 2803 389 L
-2803 389 mt 2803 389 L
-SO
-2875 4615 mt 2875 4588 L
-2875 389 mt 2875 415 L
-DO
-2875 4615 mt 2875 389 L
-2875 389 mt 2875 389 L
-SO
-2937 4615 mt 2937 4588 L
-2937 389 mt 2937 415 L
-DO
-2937 4615 mt 2937 389 L
-2937 389 mt 2937 389 L
-SO
-2992 4615 mt 2992 4588 L
-2992 389 mt 2992 415 L
-DO
-2992 4615 mt 2992 389 L
-2992 389 mt 2992 389 L
-SO
-3041 4615 mt 3041 4588 L
-3041 389 mt 3041 415 L
-DO
-3041 4615 mt 3041 389 L
-3041 389 mt 3041 389 L
-SO
-3041 4615 mt 3041 4561 L
-3041 389 mt 3041 442 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 120 FMSR
-
-2953 4797 mt
-(10) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 80 FMSR
-
-3086 4723 mt
-(2) s
-3363 4615 mt 3363 4588 L
-3363 389 mt 3363 415 L
-DO
-3363 4615 mt 3363 389 L
-3363 389 mt 3363 389 L
-SO
-3552 4615 mt 3552 4588 L
-3552 389 mt 3552 415 L
-DO
-3552 4615 mt 3552 389 L
-3552 389 mt 3552 389 L
-SO
-3686 4615 mt 3686 4588 L
-3686 389 mt 3686 415 L
-DO
-3686 4615 mt 3686 389 L
-3686 389 mt 3686 389 L
-SO
-3790 4615 mt 3790 4588 L
-3790 389 mt 3790 415 L
-DO
-3790 4615 mt 3790 389 L
-3790 389 mt 3790 389 L
-SO
-3874 4615 mt 3874 4588 L
-3874 389 mt 3874 415 L
-DO
-3874 4615 mt 3874 389 L
-3874 389 mt 3874 389 L
-SO
-3946 4615 mt 3946 4588 L
-3946 389 mt 3946 415 L
-DO
-3946 4615 mt 3946 389 L
-3946 389 mt 3946 389 L
-SO
-4008 4615 mt 4008 4588 L
-4008 389 mt 4008 415 L
-DO
-4008 4615 mt 4008 389 L
-4008 389 mt 4008 389 L
-SO
-4063 4615 mt 4063 4588 L
-4063 389 mt 4063 415 L
-DO
-4063 4615 mt 4063 389 L
-4063 389 mt 4063 389 L
-SO
-4112 4615 mt 4112 4588 L
-4112 389 mt 4112 415 L
-DO
-4112 4615 mt 4112 389 L
-4112 389 mt 4112 389 L
-SO
-4112 4615 mt 4112 4561 L
-4112 389 mt 4112 442 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 120 FMSR
-
-4024 4797 mt
-(10) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 80 FMSR
-
-4157 4723 mt
-(3) s
-4435 4615 mt 4435 4588 L
-4435 389 mt 4435 415 L
-DO
-4435 4615 mt 4435 389 L
-4435 389 mt 4435 389 L
-SO
-4623 4615 mt 4623 4588 L
-4623 389 mt 4623 415 L
-DO
-4623 4615 mt 4623 389 L
-4623 389 mt 4623 389 L
-SO
-4757 4615 mt 4757 4588 L
-4757 389 mt 4757 415 L
-DO
-4757 4615 mt 4757 389 L
-4757 389 mt 4757 389 L
-SO
-4861 4615 mt 4861 4588 L
-4861 389 mt 4861 415 L
-DO
-4861 4615 mt 4861 389 L
-4861 389 mt 4861 389 L
-SO
-4946 4615 mt 4946 4588 L
-4946 389 mt 4946 415 L
-DO
-4946 4615 mt 4946 389 L
-4946 389 mt 4946 389 L
-SO
-5017 4615 mt 5017 4588 L
-5017 389 mt 5017 415 L
-DO
-5017 4615 mt 5017 389 L
-5017 389 mt 5017 389 L
-SO
-5079 4615 mt 5079 4588 L
-5079 389 mt 5079 415 L
-DO
-5079 4615 mt 5079 389 L
-5079 389 mt 5079 389 L
-SO
-5134 4615 mt 5134 4588 L
-5134 389 mt 5134 415 L
-DO
-5134 4615 mt 5134 389 L
-5134 389 mt 5134 389 L
-SO
-5183 4615 mt 5183 4588 L
-5183 389 mt 5183 415 L
-DO
-5183 4615 mt 5183 389 L
-5183 389 mt 5183 389 L
-SO
-5183 4615 mt 5183 4561 L
-5183 389 mt 5183 442 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 120 FMSR
-
-5095 4797 mt
-(10) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 80 FMSR
-
-5228 4723 mt
-(4) s
-5506 4615 mt 5506 4588 L
-5506 389 mt 5506 415 L
-DO
-5506 4615 mt 5506 389 L
-5506 389 mt 5506 389 L
-SO
-5694 4615 mt 5694 4588 L
-5694 389 mt 5694 415 L
-DO
-5694 4615 mt 5694 389 L
-5694 389 mt 5694 389 L
-SO
-5828 4615 mt 5828 4588 L
-5828 389 mt 5828 415 L
-DO
-5828 4615 mt 5828 389 L
-5828 389 mt 5828 389 L
-SO
-5932 4615 mt 5932 4588 L
-5932 389 mt 5932 415 L
-DO
-5932 4615 mt 5932 389 L
-5932 389 mt 5932 389 L
-SO
-6017 4615 mt 6017 4588 L
-6017 389 mt 6017 415 L
-DO
-6017 4615 mt 6017 389 L
-6017 389 mt 6017 389 L
-SO
-6089 4615 mt 6089 4588 L
-6089 389 mt 6089 415 L
-DO
-6089 4615 mt 6089 389 L
-6089 389 mt 6089 389 L
-SO
-6151 4615 mt 6151 4588 L
-6151 389 mt 6151 415 L
-DO
-6151 4615 mt 6151 389 L
-6151 389 mt 6151 389 L
-SO
-6205 4615 mt 6205 4588 L
-6205 389 mt 6205 415 L
-DO
-6205 4615 mt 6205 389 L
-6205 389 mt 6205 389 L
-SO
-6255 4615 mt 6255 4588 L
-6255 389 mt 6255 415 L
-DO
-6255 4615 mt 6255 389 L
-6255 389 mt 6255 389 L
-SO
-6255 4615 mt 6255 4561 L
-6255 389 mt 6255 442 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 120 FMSR
-
-6167 4797 mt
-(10) s
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 80 FMSR
-
-6300 4723 mt
-(5) s
- 899 4615 mt 952 4615 L
-6255 4615 mt 6201 4615 L
-%%IncludeResource: font Helvetica
-/Helvetica /ISOLatin1Encoding 120 FMSR
-
- 798 4659 mt
-(0) s
- 899 4192 mt 952 4192 L
-6255 4192 mt 6201 4192 L
- 698 4236 mt
-(0.1) s
- 899 3769 mt 952 3769 L
-6255 3769 mt 6201 3769 L
- 698 3813 mt
-(0.2) s
- 899 3347 mt 952 3347 L
-6255 3347 mt 6201 3347 L
- 698 3391 mt
-(0.3) s
- 899 2924 mt 952 2924 L
-6255 2924 mt 6201 2924 L
- 698 2968 mt
-(0.4) s
- 899 2502 mt 952 2502 L
-6255 2502 mt 6201 2502 L
- 698 2546 mt
-(0.5) s
- 899 2079 mt 952 2079 L
-6255 2079 mt 6201 2079 L
- 698 2123 mt
-(0.6) s
- 899 1656 mt 952 1656 L
-6255 1656 mt 6201 1656 L
- 698 1700 mt
-(0.7) s
- 899 1234 mt 952 1234 L
-6255 1234 mt 6201 1234 L
- 698 1278 mt
-(0.8) s
- 899 811 mt 952 811 L
-6255 811 mt 6201 811 L
- 698 855 mt
-(0.9) s
- 899 389 mt 952 389 L
-6255 389 mt 6201 389 L
- 798 433 mt
-(1) s
- 899 4615 mt 6255 4615 L
- 899 389 mt 6255 389 L
- 899 4615 mt 899 389 L
-6255 4615 mt 6255 389 L
-gs 899 389 5357 4227 MR c np
-322 -20 322 -50 323 -89 322 -157 323 -232 322 -349 323 -431 322 -523
-323 -587 322 -596 323 -580 322 -399 323 -150 322 -44 1221 4605 15 MP stroke
-DA
-322 -8 322 -49 323 -92 322 -200 323 -224 322 -379 323 -536 322 -679
-323 -671 322 -591 323 -442 322 -262 323 -58 322 -8 1221 4595 15 MP stroke
-gr
-
-DA
-3087 4940 mt
-(Package Size \(kB\)) s
- 616 3117 mt -90 rotate
-(Cumulative Distribution) s
-90 rotate
- 882 4658 mt
-( ) s
-6239 431 mt
-( ) s
-SO
-1 sg
-0 334 1208 0 0 -334 4988 784 4 MP
-PP
--1208 0 0 334 1208 0 0 -334 4988 784 5 MP stroke
-4 w
-DO
-SO
-6 w
-0 sg
-4988 784 mt 6196 784 L
-4988 450 mt 6196 450 L
-4988 784 mt 4988 450 L
-6196 784 mt 6196 450 L
-4988 784 mt 6196 784 L
-4988 784 mt 4988 450 L
-4988 784 mt 6196 784 L
-4988 450 mt 6196 450 L
-4988 784 mt 4988 450 L
-6196 784 mt 6196 450 L
-5450 585 mt
-(By Number) s
-gs 4988 450 1209 335 MR c np
-355 0 5059 543 2 MP stroke
-gr
-
-5450 734 mt
-(By Popularity) s
-gs 4988 450 1209 335 MR c np
-DA
-355 0 5059 690 2 MP stroke
-SO
-gr
-
-
-end %%Color Dict
-
-eplot
-%%EndObject
-
-epage
-end
-
-showpage
-
-%%Trailer
-%%EOF
--- /dev/null
+%!PS-Adobe-2.0 EPSF-1.2
+%%Creator: MATLAB, The Mathworks, Inc. Version 7.5.0.338 (R2007b). Operating System: Linux 2.6.18.8-0.7-default #1 SMP Tue Oct 2 17:21:08 UTC 2007 i686.
+%%Title: /cs/grad1/camerond/school/matlab/cache/apt_dht_simulation-size_CDF.20080208T171700.eps
+%%CreationDate: 02/08/2008 17:17:44
+%%DocumentNeededFonts: Helvetica
+%%DocumentProcessColors: Cyan Magenta Yellow Black
+%%Extensions: CMYK
+%%Pages: 1
+%%BoundingBox: 58 196 550 591
+%%EndComments
+
+%%BeginProlog
+% MathWorks dictionary
+/MathWorks 160 dict begin
+% definition operators
+/bdef {bind def} bind def
+/ldef {load def} bind def
+/xdef {exch def} bdef
+/xstore {exch store} bdef
+% operator abbreviations
+/c /clip ldef
+/cc /concat ldef
+/cp /closepath ldef
+/gr /grestore ldef
+/gs /gsave ldef
+/mt /moveto ldef
+/np /newpath ldef
+/cm /currentmatrix ldef
+/sm /setmatrix ldef
+/rm /rmoveto ldef
+/rl /rlineto ldef
+/s {show newpath} bdef
+/sc {setcmykcolor} bdef
+/sr /setrgbcolor ldef
+/sg /setgray ldef
+/w /setlinewidth ldef
+/j /setlinejoin ldef
+/cap /setlinecap ldef
+/rc {rectclip} bdef
+/rf {rectfill} bdef
+% page state control
+/pgsv () def
+/bpage {/pgsv save def} bdef
+/epage {pgsv restore} bdef
+/bplot /gsave ldef
+/eplot {stroke grestore} bdef
+% orientation switch
+/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
+% coordinate system mappings
+/dpi2point 0 def
+% font control
+/FontSize 0 def
+/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
+ makefont setfont} bdef
+/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
+ exch dup 3 1 roll findfont dup length dict begin
+ { 1 index /FID ne {def}{pop pop} ifelse } forall
+ /Encoding exch def currentdict end definefont pop} bdef
+/isroman {findfont /CharStrings get /Agrave known} bdef
+/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
+ exch FMS} bdef
+/csm {1 dpi2point div -1 dpi2point div scale neg translate
+ dup landscapeMode eq {pop -90 rotate}
+ {rotateMode eq {90 rotate} if} ifelse} bdef
+% line types: solid, dotted, dashed, dotdash
+/SO { [] 0 setdash } bdef
+/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
+/DA { [6 dpi2point mul] 0 setdash } bdef
+/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
+ dpi2point mul] 0 setdash } bdef
+% macros for lines and objects
+/L {lineto stroke} bdef
+/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
+/AP {{rlineto} repeat} bdef
+/PDlw -1 def
+/W {/PDlw currentlinewidth def setlinewidth} def
+/PP {closepath eofill} bdef
+/DP {closepath stroke} bdef
+/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto
+ neg 0 exch rlineto closepath} bdef
+/FR {MR stroke} bdef
+/PR {MR fill} bdef
+/L1i {{currentfile picstr readhexstring pop} image} bdef
+/tMatrix matrix def
+/MakeOval {newpath tMatrix currentmatrix pop translate scale
+0 0 1 0 360 arc tMatrix setmatrix} bdef
+/FO {MakeOval stroke} bdef
+/PO {MakeOval fill} bdef
+/PD {currentlinewidth 2 div 0 360 arc fill
+ PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
+/FA {newpath tMatrix currentmatrix pop translate scale
+ 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
+/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
+ 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
+/FAn {newpath tMatrix currentmatrix pop translate scale
+ 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
+/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
+ 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
+/vradius 0 def /hradius 0 def /lry 0 def
+/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
+/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
+ /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
+ vradius add translate hradius vradius scale 0 0 1 180 270 arc
+ tMatrix setmatrix lrx hradius sub uly vradius add translate
+ hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
+ lrx hradius sub lry vradius sub translate hradius vradius scale
+ 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
+ translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
+ closepath} bdef
+/FRR {MRR stroke } bdef
+/PRR {MRR fill } bdef
+/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
+ newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+ rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
+ sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
+ closepath} bdef
+/FlrRR {MlrRR stroke } bdef
+/PlrRR {MlrRR fill } bdef
+/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
+ newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+ rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
+ sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
+ closepath} bdef
+/FtbRR {MtbRR stroke } bdef
+/PtbRR {MtbRR fill } bdef
+/stri 6 array def /dtri 6 array def
+/smat 6 array def /dmat 6 array def
+/tmat1 6 array def /tmat2 6 array def /dif 3 array def
+/asub {/ind2 exch def /ind1 exch def dup dup
+ ind1 get exch ind2 get sub exch } bdef
+/tri_to_matrix {
+ 2 0 asub 3 1 asub 4 0 asub 5 1 asub
+ dup 0 get exch 1 get 7 -1 roll astore } bdef
+/compute_transform {
+ dmat dtri tri_to_matrix tmat1 invertmatrix
+ smat stri tri_to_matrix tmat2 concatmatrix } bdef
+/ds {stri astore pop} bdef
+/dt {dtri astore pop} bdef
+/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
+ currentfile exch readhexstring pop
+ dup 0 3 index getinterval /rbmap xdef
+ dup 2 index dup getinterval /gbmap xdef
+ 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
+/it {gs np dtri aload pop moveto lineto lineto cp c
+ cols rows 8 compute_transform
+ rbmap gbmap bbmap true 3 colorimage gr}bdef
+/il {newpath moveto lineto stroke}bdef
+currentdict end def
+%%EndProlog
+
+%%BeginSetup
+MathWorks begin
+
+0 cap
+
+end
+%%EndSetup
+
+%%Page: 1 1
+%%BeginPageSetup
+%%PageBoundingBox: 58 196 550 591
+MathWorks begin
+bpage
+%%EndPageSetup
+
+%%BeginObject: obj1
+bplot
+
+/dpi2point 12 def
+portraitMode 0216 7344 csm
+
+ 480 247 5913 4745 MR c np
+76 dict begin %Colortable dictionary
+/c0 { 0.000000 0.000000 0.000000 sr} bdef
+/c1 { 1.000000 1.000000 1.000000 sr} bdef
+/c2 { 0.900000 0.000000 0.000000 sr} bdef
+/c3 { 0.000000 0.820000 0.000000 sr} bdef
+/c4 { 0.000000 0.000000 0.800000 sr} bdef
+/c5 { 0.910000 0.820000 0.320000 sr} bdef
+/c6 { 1.000000 0.260000 0.820000 sr} bdef
+/c7 { 0.000000 0.820000 0.820000 sr} bdef
+c0
+1 j
+1 sg
+ 0 0 6913 5186 PR
+6 w
+0 4226 5356 0 0 -4226 899 4615 4 MP
+PP
+-5356 0 0 4226 5356 0 0 -4226 899 4615 5 MP stroke
+4 w
+DO
+0 sg
+ 899 4615 mt 899 389 L
+ 899 389 mt 899 389 L
+1970 4615 mt 1970 389 L
+1970 389 mt 1970 389 L
+3041 4615 mt 3041 389 L
+3041 389 mt 3041 389 L
+4112 4615 mt 4112 389 L
+4112 389 mt 4112 389 L
+5183 4615 mt 5183 389 L
+5183 389 mt 5183 389 L
+6255 4615 mt 6255 389 L
+6255 389 mt 6255 389 L
+ 899 4615 mt 6255 4615 L
+6255 4615 mt 6255 4615 L
+ 899 4192 mt 6255 4192 L
+6255 4192 mt 6255 4192 L
+ 899 3769 mt 6255 3769 L
+6255 3769 mt 6255 3769 L
+ 899 3347 mt 6255 3347 L
+6255 3347 mt 6255 3347 L
+ 899 2924 mt 6255 2924 L
+6255 2924 mt 6255 2924 L
+ 899 2502 mt 6255 2502 L
+6255 2502 mt 6255 2502 L
+ 899 2079 mt 6255 2079 L
+6255 2079 mt 6255 2079 L
+ 899 1656 mt 6255 1656 L
+6255 1656 mt 6255 1656 L
+ 899 1234 mt 6255 1234 L
+6255 1234 mt 6255 1234 L
+ 899 811 mt 6255 811 L
+6255 811 mt 6255 811 L
+ 899 389 mt 6255 389 L
+6255 389 mt 6255 389 L
+SO
+6 w
+ 899 4615 mt 6255 4615 L
+ 899 389 mt 6255 389 L
+ 899 4615 mt 899 389 L
+6255 4615 mt 6255 389 L
+ 899 4615 mt 6255 4615 L
+ 899 4615 mt 899 389 L
+ 899 4615 mt 899 4588 L
+ 899 389 mt 899 415 L
+DO
+ 899 4615 mt 899 389 L
+ 899 389 mt 899 389 L
+SO
+ 899 4615 mt 899 4561 L
+ 899 389 mt 899 442 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+ 811 4797 mt
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+ 944 4723 mt
+(0) s
+1221 4615 mt 1221 4588 L
+1221 389 mt 1221 415 L
+DO
+1221 4615 mt 1221 389 L
+1221 389 mt 1221 389 L
+SO
+1410 4615 mt 1410 4588 L
+1410 389 mt 1410 415 L
+DO
+1410 4615 mt 1410 389 L
+1410 389 mt 1410 389 L
+SO
+1543 4615 mt 1543 4588 L
+1543 389 mt 1543 415 L
+DO
+1543 4615 mt 1543 389 L
+1543 389 mt 1543 389 L
+SO
+1647 4615 mt 1647 4588 L
+1647 389 mt 1647 415 L
+DO
+1647 4615 mt 1647 389 L
+1647 389 mt 1647 389 L
+SO
+1732 4615 mt 1732 4588 L
+1732 389 mt 1732 415 L
+DO
+1732 4615 mt 1732 389 L
+1732 389 mt 1732 389 L
+SO
+1804 4615 mt 1804 4588 L
+1804 389 mt 1804 415 L
+DO
+1804 4615 mt 1804 389 L
+1804 389 mt 1804 389 L
+SO
+1866 4615 mt 1866 4588 L
+1866 389 mt 1866 415 L
+DO
+1866 4615 mt 1866 389 L
+1866 389 mt 1866 389 L
+SO
+1921 4615 mt 1921 4588 L
+1921 389 mt 1921 415 L
+DO
+1921 4615 mt 1921 389 L
+1921 389 mt 1921 389 L
+SO
+1970 4615 mt 1970 4588 L
+1970 389 mt 1970 415 L
+DO
+1970 4615 mt 1970 389 L
+1970 389 mt 1970 389 L
+SO
+1970 4615 mt 1970 4561 L
+1970 389 mt 1970 442 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+1882 4797 mt
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+2015 4723 mt
+(1) s
+2292 4615 mt 2292 4588 L
+2292 389 mt 2292 415 L
+DO
+2292 4615 mt 2292 389 L
+2292 389 mt 2292 389 L
+SO
+2481 4615 mt 2481 4588 L
+2481 389 mt 2481 415 L
+DO
+2481 4615 mt 2481 389 L
+2481 389 mt 2481 389 L
+SO
+2615 4615 mt 2615 4588 L
+2615 389 mt 2615 415 L
+DO
+2615 4615 mt 2615 389 L
+2615 389 mt 2615 389 L
+SO
+2718 4615 mt 2718 4588 L
+2718 389 mt 2718 415 L
+DO
+2718 4615 mt 2718 389 L
+2718 389 mt 2718 389 L
+SO
+2803 4615 mt 2803 4588 L
+2803 389 mt 2803 415 L
+DO
+2803 4615 mt 2803 389 L
+2803 389 mt 2803 389 L
+SO
+2875 4615 mt 2875 4588 L
+2875 389 mt 2875 415 L
+DO
+2875 4615 mt 2875 389 L
+2875 389 mt 2875 389 L
+SO
+2937 4615 mt 2937 4588 L
+2937 389 mt 2937 415 L
+DO
+2937 4615 mt 2937 389 L
+2937 389 mt 2937 389 L
+SO
+2992 4615 mt 2992 4588 L
+2992 389 mt 2992 415 L
+DO
+2992 4615 mt 2992 389 L
+2992 389 mt 2992 389 L
+SO
+3041 4615 mt 3041 4588 L
+3041 389 mt 3041 415 L
+DO
+3041 4615 mt 3041 389 L
+3041 389 mt 3041 389 L
+SO
+3041 4615 mt 3041 4561 L
+3041 389 mt 3041 442 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+2953 4797 mt
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+3086 4723 mt
+(2) s
+3363 4615 mt 3363 4588 L
+3363 389 mt 3363 415 L
+DO
+3363 4615 mt 3363 389 L
+3363 389 mt 3363 389 L
+SO
+3552 4615 mt 3552 4588 L
+3552 389 mt 3552 415 L
+DO
+3552 4615 mt 3552 389 L
+3552 389 mt 3552 389 L
+SO
+3686 4615 mt 3686 4588 L
+3686 389 mt 3686 415 L
+DO
+3686 4615 mt 3686 389 L
+3686 389 mt 3686 389 L
+SO
+3790 4615 mt 3790 4588 L
+3790 389 mt 3790 415 L
+DO
+3790 4615 mt 3790 389 L
+3790 389 mt 3790 389 L
+SO
+3874 4615 mt 3874 4588 L
+3874 389 mt 3874 415 L
+DO
+3874 4615 mt 3874 389 L
+3874 389 mt 3874 389 L
+SO
+3946 4615 mt 3946 4588 L
+3946 389 mt 3946 415 L
+DO
+3946 4615 mt 3946 389 L
+3946 389 mt 3946 389 L
+SO
+4008 4615 mt 4008 4588 L
+4008 389 mt 4008 415 L
+DO
+4008 4615 mt 4008 389 L
+4008 389 mt 4008 389 L
+SO
+4063 4615 mt 4063 4588 L
+4063 389 mt 4063 415 L
+DO
+4063 4615 mt 4063 389 L
+4063 389 mt 4063 389 L
+SO
+4112 4615 mt 4112 4588 L
+4112 389 mt 4112 415 L
+DO
+4112 4615 mt 4112 389 L
+4112 389 mt 4112 389 L
+SO
+4112 4615 mt 4112 4561 L
+4112 389 mt 4112 442 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+4024 4797 mt
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+4157 4723 mt
+(3) s
+4435 4615 mt 4435 4588 L
+4435 389 mt 4435 415 L
+DO
+4435 4615 mt 4435 389 L
+4435 389 mt 4435 389 L
+SO
+4623 4615 mt 4623 4588 L
+4623 389 mt 4623 415 L
+DO
+4623 4615 mt 4623 389 L
+4623 389 mt 4623 389 L
+SO
+4757 4615 mt 4757 4588 L
+4757 389 mt 4757 415 L
+DO
+4757 4615 mt 4757 389 L
+4757 389 mt 4757 389 L
+SO
+4861 4615 mt 4861 4588 L
+4861 389 mt 4861 415 L
+DO
+4861 4615 mt 4861 389 L
+4861 389 mt 4861 389 L
+SO
+4946 4615 mt 4946 4588 L
+4946 389 mt 4946 415 L
+DO
+4946 4615 mt 4946 389 L
+4946 389 mt 4946 389 L
+SO
+5017 4615 mt 5017 4588 L
+5017 389 mt 5017 415 L
+DO
+5017 4615 mt 5017 389 L
+5017 389 mt 5017 389 L
+SO
+5079 4615 mt 5079 4588 L
+5079 389 mt 5079 415 L
+DO
+5079 4615 mt 5079 389 L
+5079 389 mt 5079 389 L
+SO
+5134 4615 mt 5134 4588 L
+5134 389 mt 5134 415 L
+DO
+5134 4615 mt 5134 389 L
+5134 389 mt 5134 389 L
+SO
+5183 4615 mt 5183 4588 L
+5183 389 mt 5183 415 L
+DO
+5183 4615 mt 5183 389 L
+5183 389 mt 5183 389 L
+SO
+5183 4615 mt 5183 4561 L
+5183 389 mt 5183 442 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+5095 4797 mt
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+5228 4723 mt
+(4) s
+5506 4615 mt 5506 4588 L
+5506 389 mt 5506 415 L
+DO
+5506 4615 mt 5506 389 L
+5506 389 mt 5506 389 L
+SO
+5694 4615 mt 5694 4588 L
+5694 389 mt 5694 415 L
+DO
+5694 4615 mt 5694 389 L
+5694 389 mt 5694 389 L
+SO
+5828 4615 mt 5828 4588 L
+5828 389 mt 5828 415 L
+DO
+5828 4615 mt 5828 389 L
+5828 389 mt 5828 389 L
+SO
+5932 4615 mt 5932 4588 L
+5932 389 mt 5932 415 L
+DO
+5932 4615 mt 5932 389 L
+5932 389 mt 5932 389 L
+SO
+6017 4615 mt 6017 4588 L
+6017 389 mt 6017 415 L
+DO
+6017 4615 mt 6017 389 L
+6017 389 mt 6017 389 L
+SO
+6089 4615 mt 6089 4588 L
+6089 389 mt 6089 415 L
+DO
+6089 4615 mt 6089 389 L
+6089 389 mt 6089 389 L
+SO
+6151 4615 mt 6151 4588 L
+6151 389 mt 6151 415 L
+DO
+6151 4615 mt 6151 389 L
+6151 389 mt 6151 389 L
+SO
+6205 4615 mt 6205 4588 L
+6205 389 mt 6205 415 L
+DO
+6205 4615 mt 6205 389 L
+6205 389 mt 6205 389 L
+SO
+6255 4615 mt 6255 4588 L
+6255 389 mt 6255 415 L
+DO
+6255 4615 mt 6255 389 L
+6255 389 mt 6255 389 L
+SO
+6255 4615 mt 6255 4561 L
+6255 389 mt 6255 442 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+6167 4797 mt
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+6300 4723 mt
+(5) s
+ 899 4615 mt 952 4615 L
+6255 4615 mt 6201 4615 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+ 798 4659 mt
+(0) s
+ 899 4192 mt 952 4192 L
+6255 4192 mt 6201 4192 L
+ 698 4236 mt
+(0.1) s
+ 899 3769 mt 952 3769 L
+6255 3769 mt 6201 3769 L
+ 698 3813 mt
+(0.2) s
+ 899 3347 mt 952 3347 L
+6255 3347 mt 6201 3347 L
+ 698 3391 mt
+(0.3) s
+ 899 2924 mt 952 2924 L
+6255 2924 mt 6201 2924 L
+ 698 2968 mt
+(0.4) s
+ 899 2502 mt 952 2502 L
+6255 2502 mt 6201 2502 L
+ 698 2546 mt
+(0.5) s
+ 899 2079 mt 952 2079 L
+6255 2079 mt 6201 2079 L
+ 698 2123 mt
+(0.6) s
+ 899 1656 mt 952 1656 L
+6255 1656 mt 6201 1656 L
+ 698 1700 mt
+(0.7) s
+ 899 1234 mt 952 1234 L
+6255 1234 mt 6201 1234 L
+ 698 1278 mt
+(0.8) s
+ 899 811 mt 952 811 L
+6255 811 mt 6201 811 L
+ 698 855 mt
+(0.9) s
+ 899 389 mt 952 389 L
+6255 389 mt 6201 389 L
+ 798 433 mt
+(1) s
+ 899 4615 mt 6255 4615 L
+ 899 389 mt 6255 389 L
+ 899 4615 mt 899 389 L
+6255 4615 mt 6255 389 L
+gs 899 389 5357 4227 MR c np
+322 -20 322 -50 323 -89 322 -157 323 -232 322 -349 323 -431 322 -523
+323 -587 322 -596 323 -580 322 -399 323 -150 322 -44 1221 4605 15 MP stroke
+DA
+322 -8 322 -49 323 -92 322 -200 323 -224 322 -379 323 -536 322 -679
+323 -671 322 -591 323 -442 322 -262 323 -58 322 -8 1221 4595 15 MP stroke
+gr
+
+DA
+3087 4940 mt
+(Package Size \(kB\)) s
+ 616 3117 mt -90 rotate
+(Cumulative Distribution) s
+90 rotate
+ 882 4658 mt
+( ) s
+6239 431 mt
+( ) s
+SO
+1 sg
+0 334 1208 0 0 -334 4988 784 4 MP
+PP
+-1208 0 0 334 1208 0 0 -334 4988 784 5 MP stroke
+4 w
+DO
+SO
+6 w
+0 sg
+4988 784 mt 6196 784 L
+4988 450 mt 6196 450 L
+4988 784 mt 4988 450 L
+6196 784 mt 6196 450 L
+4988 784 mt 6196 784 L
+4988 784 mt 4988 450 L
+4988 784 mt 6196 784 L
+4988 450 mt 6196 450 L
+4988 784 mt 4988 450 L
+6196 784 mt 6196 450 L
+5450 585 mt
+(By Number) s
+gs 4988 450 1209 335 MR c np
+355 0 5059 543 2 MP stroke
+gr
+
+5450 734 mt
+(By Popularity) s
+gs 4988 450 1209 335 MR c np
+DA
+355 0 5059 690 2 MP stroke
+SO
+gr
+
+
+end %%Color Dict
+
+eplot
+%%EndObject
+
+epage
+end
+
+showpage
+
+%%Trailer
+%%EOF