DESCRIPTION

From: Cameron Dale Date: Thu, 6 Mar 2008 00:17:03 +0000 (-0800) Subject: Rename all apt-dht files to apt-p2p. X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=7b1167d8ce780312d3689c9309c7e9c64060c085;p=quix0rs-apt-p2p.git Rename all apt-dht files to apt-p2p. --- diff --git a/apt-dht.conf b/apt-dht.conf deleted file mode 100644 index 9a12eed..0000000 --- a/apt-dht.conf +++ /dev/null @@ -1,101 +0,0 @@ -# The apt-p2p configuration file. -# -# This is an ini-type configuration file, using sections identified by -# square brackets. Values are specified on a single line using the '=' -# sign. Some values indicate times, in which case a suffix of 'd' for -# days, 'h' for hours, 'm' for minutes, and 's' for seconds can be used. -# Some values can span multiple lines by starting the subsequent lines -# with one or more spaces. -# -######################### DEFAULT ################################### -# This is the default section containing the configuration options for the -# main application. -[DEFAULT] - -# The number of the port to listen on for requests. -# The main application will use this TCP port to listen for requests from APT, and -# for uploads to other peers. If a port is not specified for the DHT, it will also -# use this UDP port to listen for DHT requests. -PORT = 9977 - -# Directory to store the downloaded files in -CACHE_DIR = /var/cache/apt-p2p - -# Other directories containing packages to share with others -# WARNING: all files in these directories will be hashed and available -# for everybody to download -# OTHER_DIRS = - -# Whether it's OK to use an IP addres from a known local/private range -LOCAL_OK = no - -# Unload the packages cache after an interval of inactivity this long. -# The packages cache uses a lot of memory, and only takes a few seconds -# to reload when a new request arrives. -UNLOAD_PACKAGES_CACHE = 5m - -# Refresh the DHT keys after this much time has passed. -# This should be a time slightly less than the DHT's KEY_EXPIRE value. -KEY_REFRESH = 57m - -# Which DHT implementation to use. -# It must be possile to do "from .DHT import DHT" to get a class that -# implements the IDHT interface. There should also be a similarly named -# section below to specify the options for the DHT. -DHT = apt_p2p_Khashmir - -# Whether to only run the DHT (for providing only a bootstrap node) -DHT-ONLY = no - -####################### apt_p2p_Khashmir ############################ -# This is the default (included) DHT to use. -[apt_p2p_Khashmir] - -# To specify a different (UDP) port for the DHT to use. -# If not specified here, the PORT value in the DEFAULT section will be used. -# PORT = - -# bootstrap nodes to contact to join the DHT -BOOTSTRAP = www.camrdale.org:9977 - steveholt.hopto.org:9976 - -# whether this node is a bootstrap node -BOOTSTRAP_NODE = no - -# Kademlia "K" constant, this should be an even number -K = 8 - -# SHA1 is 160 bits long -HASH_LENGTH = 160 - -# interval between saving the running state -CHECKPOINT_INTERVAL = 5m - -# concurrent number of calls per find node/value request! -CONCURRENT_REQS = 4 - -# how many hosts to post values to -STORE_REDUNDANCY = 3 - -# How many values to attempt to retrieve from the DHT. -# Setting this to 0 will try and get all values (which could take a while if -# a lot of nodes have values). Setting it negative will try to get that -# number of results from only the closest STORE_REDUNDANCY nodes to the hash. -# The default is a large negative number so all values from the closest -# STORE_REDUNDANCY nodes will be retrieved. -RETRIEVE_VALUES = -10000 - -# how many times in a row a node can fail to respond before it's booted from the routing table -MAX_FAILURES = 3 - -# never ping a node more often than this -MIN_PING_INTERVAL = 15m - -# refresh buckets that haven't been touched in this long -BUCKET_STALENESS = 1h - -# expire unrefreshed entries older than this -KEY_EXPIRE = 1h - -# whether to spew info about the requests/responses in the protocol -SPEW = no diff --git a/apt-dht.py b/apt-dht.py deleted file mode 100644 index 6873204..0000000 --- a/apt-dht.py +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env python - -# Load apt-p2p application -# -# There are two ways apt-p2p can be started: -# 1. twistd -y apt-p2p -# - twistd will load this file and execute the app -# in 'application' variable -# 2. from command line -# - __name__ will be '__main__' - -import pwd,sys - -from twisted.application import service, internet, app, strports -from twisted.internet import reactor -from twisted.python import usage, log -from twisted.web2 import channel - -from apt_p2p.apt_p2p_conf import config, version, DEFAULT_CONFIG_FILES -from apt_p2p.interfaces import IDHT - -config_file = '' - -if __name__ == '__main__': - # Parse command line parameters when started on command line - class AptP2POptions(usage.Options): - optFlags = [ - ['help', 'h', 'Print this help message'], - ] - optParameters = [ - ['config-file', 'c', '', "Configuration file"], - ['log-file', 'l', '-', "File to log to, - for stdout"], - ] - longdesc="apt-p2p is a peer-to-peer downloader for apt users" - def opt_version(self): - print "apt-p2p %s" % version.short() - sys.exit(0) - - opts = AptP2POptions() - try: - opts.parseOptions() - except usage.UsageError, ue: - print '%s: %s' % (sys.argv[0], ue) - sys.exit(1) - - config_file = opts.opts['config-file'] - log_file = opts.opts['log-file'] - if log_file == '-': - f = sys.stdout - else: - f = open(log_file, 'w') - log.startLogging(f, setStdout=1) - -log.msg("Loading config files: '%s'" % "', '".join(DEFAULT_CONFIG_FILES + [config_file])) -config_read = config.read(DEFAULT_CONFIG_FILES + [config_file]) -log.msg("Successfully loaded config files: '%s'" % "', '".join(config_read)) -if config.has_option('DEFAULT', 'username') and config.get('DEFAULT', 'username'): - uid,gid = pwd.getpwnam(config.get('DEFAULT', 'username'))[2:4] -else: - uid,gid = None,None - -log.msg('Starting application') -application = service.Application("apt-p2p", uid, gid) -#print service.IProcess(application).processName -#service.IProcess(application).processName = 'apt-p2p' - -log.msg('Starting DHT') -DHT = __import__(config.get('DEFAULT', 'DHT')+'.DHT', globals(), locals(), ['DHT']) -assert IDHT.implementedBy(DHT.DHT), "You must provide a DHT implementation that implements the IDHT interface." -myDHT = DHT.DHT() - -if not config.getboolean('DEFAULT', 'DHT-only'): - log.msg('Starting main application server') - from apt_p2p.apt_p2p import AptP2P - myapp = AptP2P(myDHT) - factory = myapp.getHTTPFactory() - s = strports.service('tcp:'+config.get('DEFAULT', 'port'), factory) - s.setServiceParent(application) -else: - myDHT.loadConfig(config, config.get('DEFAULT', 'DHT')) - myDHT.join() - -if __name__ == '__main__': - # Run on command line - service.IServiceCollection(application).privilegedStartService() - service.IServiceCollection(application).startService() - reactor.run() diff --git a/apt-p2p.conf b/apt-p2p.conf new file mode 100644 index 0000000..9a12eed --- /dev/null +++ b/apt-p2p.conf @@ -0,0 +1,101 @@ +# The apt-p2p configuration file. +# +# This is an ini-type configuration file, using sections identified by +# square brackets. Values are specified on a single line using the '=' +# sign. Some values indicate times, in which case a suffix of 'd' for +# days, 'h' for hours, 'm' for minutes, and 's' for seconds can be used. +# Some values can span multiple lines by starting the subsequent lines +# with one or more spaces. +# +######################### DEFAULT ################################### +# This is the default section containing the configuration options for the +# main application. +[DEFAULT] + +# The number of the port to listen on for requests. +# The main application will use this TCP port to listen for requests from APT, and +# for uploads to other peers. If a port is not specified for the DHT, it will also +# use this UDP port to listen for DHT requests. +PORT = 9977 + +# Directory to store the downloaded files in +CACHE_DIR = /var/cache/apt-p2p + +# Other directories containing packages to share with others +# WARNING: all files in these directories will be hashed and available +# for everybody to download +# OTHER_DIRS = + +# Whether it's OK to use an IP addres from a known local/private range +LOCAL_OK = no + +# Unload the packages cache after an interval of inactivity this long. +# The packages cache uses a lot of memory, and only takes a few seconds +# to reload when a new request arrives. +UNLOAD_PACKAGES_CACHE = 5m + +# Refresh the DHT keys after this much time has passed. +# This should be a time slightly less than the DHT's KEY_EXPIRE value. +KEY_REFRESH = 57m + +# Which DHT implementation to use. +# It must be possile to do "from .DHT import DHT" to get a class that +# implements the IDHT interface. There should also be a similarly named +# section below to specify the options for the DHT. +DHT = apt_p2p_Khashmir + +# Whether to only run the DHT (for providing only a bootstrap node) +DHT-ONLY = no + +####################### apt_p2p_Khashmir ############################ +# This is the default (included) DHT to use. +[apt_p2p_Khashmir] + +# To specify a different (UDP) port for the DHT to use. +# If not specified here, the PORT value in the DEFAULT section will be used. +# PORT = + +# bootstrap nodes to contact to join the DHT +BOOTSTRAP = www.camrdale.org:9977 + steveholt.hopto.org:9976 + +# whether this node is a bootstrap node +BOOTSTRAP_NODE = no + +# Kademlia "K" constant, this should be an even number +K = 8 + +# SHA1 is 160 bits long +HASH_LENGTH = 160 + +# interval between saving the running state +CHECKPOINT_INTERVAL = 5m + +# concurrent number of calls per find node/value request! +CONCURRENT_REQS = 4 + +# how many hosts to post values to +STORE_REDUNDANCY = 3 + +# How many values to attempt to retrieve from the DHT. +# Setting this to 0 will try and get all values (which could take a while if +# a lot of nodes have values). Setting it negative will try to get that +# number of results from only the closest STORE_REDUNDANCY nodes to the hash. +# The default is a large negative number so all values from the closest +# STORE_REDUNDANCY nodes will be retrieved. +RETRIEVE_VALUES = -10000 + +# how many times in a row a node can fail to respond before it's booted from the routing table +MAX_FAILURES = 3 + +# never ping a node more often than this +MIN_PING_INTERVAL = 15m + +# refresh buckets that haven't been touched in this long +BUCKET_STALENESS = 1h + +# expire unrefreshed entries older than this +KEY_EXPIRE = 1h + +# whether to spew info about the requests/responses in the protocol +SPEW = no diff --git a/apt-p2p.py b/apt-p2p.py new file mode 100644 index 0000000..6873204 --- /dev/null +++ b/apt-p2p.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python + +# Load apt-p2p application +# +# There are two ways apt-p2p can be started: +# 1. twistd -y apt-p2p +# - twistd will load this file and execute the app +# in 'application' variable +# 2. from command line +# - __name__ will be '__main__' + +import pwd,sys + +from twisted.application import service, internet, app, strports +from twisted.internet import reactor +from twisted.python import usage, log +from twisted.web2 import channel + +from apt_p2p.apt_p2p_conf import config, version, DEFAULT_CONFIG_FILES +from apt_p2p.interfaces import IDHT + +config_file = '' + +if __name__ == '__main__': + # Parse command line parameters when started on command line + class AptP2POptions(usage.Options): + optFlags = [ + ['help', 'h', 'Print this help message'], + ] + optParameters = [ + ['config-file', 'c', '', "Configuration file"], + ['log-file', 'l', '-', "File to log to, - for stdout"], + ] + longdesc="apt-p2p is a peer-to-peer downloader for apt users" + def opt_version(self): + print "apt-p2p %s" % version.short() + sys.exit(0) + + opts = AptP2POptions() + try: + opts.parseOptions() + except usage.UsageError, ue: + print '%s: %s' % (sys.argv[0], ue) + sys.exit(1) + + config_file = opts.opts['config-file'] + log_file = opts.opts['log-file'] + if log_file == '-': + f = sys.stdout + else: + f = open(log_file, 'w') + log.startLogging(f, setStdout=1) + +log.msg("Loading config files: '%s'" % "', '".join(DEFAULT_CONFIG_FILES + [config_file])) +config_read = config.read(DEFAULT_CONFIG_FILES + [config_file]) +log.msg("Successfully loaded config files: '%s'" % "', '".join(config_read)) +if config.has_option('DEFAULT', 'username') and config.get('DEFAULT', 'username'): + uid,gid = pwd.getpwnam(config.get('DEFAULT', 'username'))[2:4] +else: + uid,gid = None,None + +log.msg('Starting application') +application = service.Application("apt-p2p", uid, gid) +#print service.IProcess(application).processName +#service.IProcess(application).processName = 'apt-p2p' + +log.msg('Starting DHT') +DHT = __import__(config.get('DEFAULT', 'DHT')+'.DHT', globals(), locals(), ['DHT']) +assert IDHT.implementedBy(DHT.DHT), "You must provide a DHT implementation that implements the IDHT interface." +myDHT = DHT.DHT() + +if not config.getboolean('DEFAULT', 'DHT-only'): + log.msg('Starting main application server') + from apt_p2p.apt_p2p import AptP2P + myapp = AptP2P(myDHT) + factory = myapp.getHTTPFactory() + s = strports.service('tcp:'+config.get('DEFAULT', 'port'), factory) + s.setServiceParent(application) +else: + myDHT.loadConfig(config, config.get('DEFAULT', 'DHT')) + myDHT.join() + +if __name__ == '__main__': + # Run on command line + service.IServiceCollection(application).privilegedStartService() + service.IServiceCollection(application).startService() + reactor.run() diff --git a/apt_dht/AptPackages.py b/apt_dht/AptPackages.py deleted file mode 100644 index 44c84b5..0000000 --- a/apt_dht/AptPackages.py +++ /dev/null @@ -1,625 +0,0 @@ -# -# Copyright (C) 2002 Manuel Estrada Sainz -# Copyright (C) 2008 Cameron Dale -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of version 2.1 of the GNU General Public -# License as published by the Free Software Foundation. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -"""Manage a mirror's index files. - -@type TRACKED_FILES: C{list} of C{string} -@var TRACKED_FILES: the file names of files that contain index information -""" - -# Disable the FutureWarning from the apt module -import warnings -warnings.simplefilter("ignore", FutureWarning) - -import os, shelve -from random import choice -from shutil import rmtree -from copy import deepcopy -from UserDict import DictMixin - -from twisted.internet import threads, defer, reactor -from twisted.python import log -from twisted.python.filepath import FilePath -from twisted.trial import unittest - -import apt_pkg, apt_inst -from apt import OpProgress -from debian_bundle import deb822 - -from Hash import HashObject - -apt_pkg.init() - -TRACKED_FILES = ['release', 'sources', 'packages'] - -class PackageFileList(DictMixin): - """Manages a list of index files belonging to a mirror. - - @type cache_dir: L{twisted.python.filepath.FilePath} - @ivar cache_dir: the directory to use for storing all files - @type packages: C{shelve dictionary} - @ivar packages: the files tracked for this mirror - """ - - def __init__(self, cache_dir): - """Initialize the list by opening the dictionary.""" - self.cache_dir = cache_dir - self.cache_dir.restat(False) - if not self.cache_dir.exists(): - self.cache_dir.makedirs() - self.packages = None - self.open() - - def open(self): - """Open the persistent dictionary of files for this mirror.""" - if self.packages is None: - self.packages = shelve.open(self.cache_dir.child('packages.db').path) - - def close(self): - """Close the persistent dictionary.""" - if self.packages is not None: - self.packages.close() - - def update_file(self, cache_path, file_path): - """Check if an updated file needs to be tracked. - - Called from the mirror manager when files get updated so we can update our - fake lists and sources.list. - - @type cache_path: C{string} - @param cache_path: the location of the file within the mirror - @type file_path: L{twisted.python.filepath.FilePath} - @param file_path: The location of the file in the file system - @rtype: C{boolean} - @return: whether the file is an index file - """ - filename = cache_path.split('/')[-1] - if filename.lower() in TRACKED_FILES: - log.msg("Registering package file: "+cache_path) - self.packages[cache_path] = file_path - return True - return False - - def check_files(self): - """Check all files in the database to remove any that don't exist.""" - files = self.packages.keys() - for f in files: - self.packages[f].restat(False) - if not self.packages[f].exists(): - log.msg("File in packages database has been deleted: "+f) - del self.packages[f] - - #{ Dictionary interface details - def __getitem__(self, key): return self.packages[key] - def __setitem__(self, key, item): self.packages[key] = item - def __delitem__(self, key): del self.packages[key] - def keys(self): return self.packages.keys() - -class AptPackages: - """Answers queries about packages available from a mirror. - - Uses the python-apt tools to parse and provide information about the - files that are available on a single mirror. - - @ivar DEFAULT_APT_CONFIG: the default configuration parameters to use for apt - @ivar essential_dirs: directories that must be created for apt to work - @ivar essential_files: files that must be created for apt to work - @type cache_dir: L{twisted.python.filepath.FilePath} - @ivar cache_dir: the directory to use for storing all files - @type unload_delay: C{int} - @ivar unload_delay: the time to wait before unloading the apt cache - @ivar apt_config: the configuration parameters to use for apt - @type packages: L{PackageFileList} - @ivar packages: the persistent storage of tracked apt index files - @type loaded: C{boolean} - @ivar loaded: whether the apt cache is currently loaded - @type loading: L{twisted.internet.defer.Deferred} - @ivar loading: if the cache is currently being loaded, this will be - called when it is loaded, otherwise it is None - @type unload_later: L{twisted.internet.interfaces.IDelayedCall} - @ivar unload_later: the delayed call to unload the apt cache - @type indexrecords: C{dictionary} - @ivar indexrecords: the hashes of index files for the mirror, keys are - mirror directories, values are dictionaries with keys the path to the - index file in the mirror directory and values are dictionaries with - keys the hash type and values the hash - @type cache: C{apt_pkg.GetCache()} - @ivar cache: the apt cache of the mirror - @type records: C{apt_pkg.GetPkgRecords()} - @ivar records: the apt package records for all binary packages in a mirror - @type srcrecords: C{apt_pkg.GetPkgSrcRecords} - @ivar srcrecords: the apt package records for all source packages in a mirror - """ - - DEFAULT_APT_CONFIG = { - #'APT' : '', - #'APT::Architecture' : 'i386', # Commented so the machine's config will set this - #'APT::Default-Release' : 'unstable', - 'Dir':'.', # / - 'Dir::State' : 'apt/', # var/lib/apt/ - 'Dir::State::Lists': 'lists/', # lists/ - #'Dir::State::cdroms' : 'cdroms.list', - 'Dir::State::userstatus' : 'status.user', - 'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status' - 'Dir::Cache' : '.apt/cache/', # var/cache/apt/ - #'Dir::Cache::archives' : 'archives/', - 'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin', - 'Dir::Cache::pkgcache' : 'pkgcache.bin', - 'Dir::Etc' : 'apt/etc/', # etc/apt/ - 'Dir::Etc::sourcelist' : 'sources.list', - 'Dir::Etc::vendorlist' : 'vendors.list', - 'Dir::Etc::vendorparts' : 'vendors.list.d', - #'Dir::Etc::main' : 'apt.conf', - #'Dir::Etc::parts' : 'apt.conf.d', - #'Dir::Etc::preferences' : 'preferences', - 'Dir::Bin' : '', - #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods' - 'Dir::Bin::dpkg' : '/usr/bin/dpkg', - #'DPkg' : '', - #'DPkg::Pre-Install-Pkgs' : '', - #'DPkg::Tools' : '', - #'DPkg::Tools::Options' : '', - #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '', - #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2', - #'DPkg::Post-Invoke' : '', - } - essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists', - 'apt/lists/partial') - essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',) - - def __init__(self, cache_dir, unload_delay): - """Construct a new packages manager. - - @param cache_dir: directory to use to store files for this mirror - """ - self.cache_dir = cache_dir - self.unload_delay = unload_delay - self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG) - - # Create the necessary files and directories for apt - for dir in self.essential_dirs: - path = self.cache_dir.preauthChild(dir) - if not path.exists(): - path.makedirs() - for file in self.essential_files: - path = self.cache_dir.preauthChild(file) - if not path.exists(): - path.touch() - - self.apt_config['Dir'] = self.cache_dir.path - self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path - self.packages = PackageFileList(cache_dir) - self.loaded = False - self.loading = None - self.unload_later = None - - def __del__(self): - self.cleanup() - - def addRelease(self, cache_path, file_path): - """Add a Release file's info to the list of index files. - - Dirty hack until python-apt supports apt-pkg/indexrecords.h - (see Bug #456141) - """ - self.indexrecords[cache_path] = {} - - read_packages = False - f = file_path.open('r') - - # Use python-debian routines to parse the file for hashes - rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256']) - for hash_type in rel: - for file in rel[hash_type]: - self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file[hash_type], file['size']) - - f.close() - - def file_updated(self, cache_path, file_path): - """A file in the mirror has changed or been added. - - If this affects us, unload our apt database. - @see: L{PackageFileList.update_file} - """ - if self.packages.update_file(cache_path, file_path): - self.unload() - - def load(self): - """Make sure the package cache is initialized and loaded.""" - # Reset the pending unload call - if self.unload_later and self.unload_later.active(): - self.unload_later.reset(self.unload_delay) - else: - self.unload_later = reactor.callLater(self.unload_delay, self.unload) - - # Make sure it's not already being loaded - if self.loading is None: - log.msg('Loading the packages cache') - self.loading = threads.deferToThread(self._load) - self.loading.addCallback(self.doneLoading) - return self.loading - - def doneLoading(self, loadResult): - """Cache is loaded.""" - self.loading = None - # Must pass on the result for the next callback - return loadResult - - def _load(self): - """Regenerates the fake configuration and loads the packages caches.""" - if self.loaded: return True - - # Modify the default configuration to create the fake one. - apt_pkg.InitSystem() - self.cache_dir.preauthChild(self.apt_config['Dir::State'] - ).preauthChild(self.apt_config['Dir::State::Lists']).remove() - self.cache_dir.preauthChild(self.apt_config['Dir::State'] - ).preauthChild(self.apt_config['Dir::State::Lists'] - ).child('partial').makedirs() - sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc'] - ).preauthChild(self.apt_config['Dir::Etc::sourcelist']) - sources = sources_file.open('w') - sources_count = 0 - deb_src_added = False - self.packages.check_files() - self.indexrecords = {} - - # Create an entry in sources.list for each needed index file - for f in self.packages: - # we should probably clear old entries from self.packages and - # take into account the recorded mtime as optimization - file = self.packages[f] - if f.split('/')[-1] == "Release": - self.addRelease(f, file) - fake_uri='http://apt-p2p'+f - fake_dirname = '/'.join(fake_uri.split('/')[:-1]) - if f.endswith('Sources'): - deb_src_added = True - source_line='deb-src '+fake_dirname+'/ /' - else: - source_line='deb '+fake_dirname+'/ /' - listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State'] - ).preauthChild(self.apt_config['Dir::State::Lists'] - ).child(apt_pkg.URItoFileName(fake_uri)) - sources.write(source_line+'\n') - log.msg("Sources line: " + source_line) - sources_count = sources_count + 1 - - if listpath.exists(): - #we should empty the directory instead - listpath.remove() - os.symlink(file.path, listpath.path) - sources.close() - - if sources_count == 0: - log.msg("No Packages files available for %s backend"%(self.cache_dir.path)) - return False - - log.msg("Loading Packages database for "+self.cache_dir.path) - for key, value in self.apt_config.items(): - apt_pkg.Config[key] = value - - self.cache = apt_pkg.GetCache(OpProgress()) - self.records = apt_pkg.GetPkgRecords(self.cache) - if deb_src_added: - self.srcrecords = apt_pkg.GetPkgSrcRecords() - else: - self.srcrecords = None - - self.loaded = True - return True - - def unload(self): - """Tries to make the packages server quit.""" - if self.unload_later and self.unload_later.active(): - self.unload_later.cancel() - self.unload_later = None - if self.loaded: - log.msg('Unloading the packages cache') - # This should save memory - del self.cache - del self.records - del self.srcrecords - del self.indexrecords - self.loaded = False - - def cleanup(self): - """Cleanup and close any loaded caches.""" - self.unload() - if self.unload_later and self.unload_later.active(): - self.unload_later.cancel() - self.packages.close() - - def findHash(self, path): - """Find the hash for a given path in this mirror. - - @type path: C{string} - @param path: the path within the mirror of the file to lookup - @rtype: L{twisted.internet.defer.Deferred} - @return: a deferred so it can make sure the cache is loaded first - """ - d = defer.Deferred() - - deferLoad = self.load() - deferLoad.addCallback(self._findHash, path, d) - deferLoad.addErrback(self._findHash_error, path, d) - - return d - - def _findHash_error(self, failure, path, d): - """An error occurred, return an empty hash.""" - log.msg('An error occurred while looking up a hash for: %s' % path) - log.err(failure) - d.callback(HashObject()) - return failure - - def _findHash(self, loadResult, path, d): - """Search the records for the hash of a path. - - @type loadResult: C{boolean} - @param loadResult: whether apt's cache was successfully loaded - @type path: C{string} - @param path: the path within the mirror of the file to lookup - @type d: L{twisted.internet.defer.Deferred} - @param d: the deferred to callback with the result - """ - if not loadResult: - d.callback(HashObject()) - return loadResult - - h = HashObject() - - # First look for the path in the cache of index files - for release in self.indexrecords: - if path.startswith(release[:-7]): - for indexFile in self.indexrecords[release]: - if release[:-7] + indexFile == path: - h.setFromIndexRecord(self.indexrecords[release][indexFile]) - d.callback(h) - return loadResult - - package = path.split('/')[-1].split('_')[0] - - # Check the binary packages - try: - for version in self.cache[package].VersionList: - size = version.Size - for verFile in version.FileList: - if self.records.Lookup(verFile): - if '/' + self.records.FileName == path: - h.setFromPkgRecord(self.records, size) - d.callback(h) - return loadResult - except KeyError: - pass - - # Check the source packages' files - if self.srcrecords: - self.srcrecords.Restart() - if self.srcrecords.Lookup(package): - for f in self.srcrecords.Files: - if path == '/' + f[2]: - h.setFromSrcRecord(f) - d.callback(h) - return loadResult - - d.callback(h) - - # Have to pass the returned loadResult on in case other calls to this function are pending. - return loadResult - -class TestAptPackages(unittest.TestCase): - """Unit tests for the AptPackages cache.""" - - pending_calls = [] - client = None - timeout = 10 - packagesFile = '' - sourcesFile = '' - releaseFile = '' - - def setUp(self): - """Initializes the cache with files found in the traditional apt location.""" - self.client = AptPackages(FilePath('/tmp/.apt-p2p'), 300) - - # Find the largest index files that are for 'main' - self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n') - self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n') - - # Find the Release file corresponding to the found Packages file - for f in os.walk('/var/lib/apt/lists').next()[2]: - if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]): - self.releaseFile = f - break - - # Add all the found files to the PackageFileList - self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'), - FilePath('/var/lib/apt/lists/' + self.releaseFile)) - self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'), - FilePath('/var/lib/apt/lists/' + self.packagesFile)) - self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'), - FilePath('/var/lib/apt/lists/' + self.sourcesFile)) - - def test_pkg_hash(self): - """Tests loading the binary package records cache.""" - self.client._load() - - self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0]) - - pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.packagesFile + - ' | grep -E "^SHA1:" | head -n 1' + - ' | cut -d\ -f 2').read().rstrip('\n') - - self.failUnless(self.client.records.SHA1Hash == pkg_hash, - "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash)) - - def test_src_hash(self): - """Tests loading the source package records cache.""" - self.client._load() - - self.client.srcrecords.Lookup('dpkg') - - src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.sourcesFile + - ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + - ' | cut -d\ -f 2').read().split('\n')[:-1] - - for f in self.client.srcrecords.Files: - self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes)) - - def test_index_hash(self): - """Tests loading the cache of index file information.""" - self.client._load() - - indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0] - - idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + - '/var/lib/apt/lists/' + self.releaseFile + - ' | grep -E " main/binary-i386/Packages.bz2$"' - ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n') - - self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash)) - - def verifyHash(self, found_hash, path, true_hash): - self.failUnless(found_hash.hexexpected() == true_hash, - "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash)) - - def test_findIndexHash(self): - """Tests finding the hash of a single index file.""" - lastDefer = defer.Deferred() - - idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + - '/var/lib/apt/lists/' + self.releaseFile + - ' | grep -E " main/binary-i386/Packages.bz2$"' - ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n') - idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2' - - d = self.client.findHash(idx_path) - d.addCallback(self.verifyHash, idx_path, idx_hash) - - d.addBoth(lastDefer.callback) - return lastDefer - - def test_findPkgHash(self): - """Tests finding the hash of a single binary package.""" - lastDefer = defer.Deferred() - - pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.packagesFile + - ' | grep -E "^SHA1:" | head -n 1' + - ' | cut -d\ -f 2').read().rstrip('\n') - pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.packagesFile + - ' | grep -E "^Filename:" | head -n 1' + - ' | cut -d\ -f 2').read().rstrip('\n') - - d = self.client.findHash(pkg_path) - d.addCallback(self.verifyHash, pkg_path, pkg_hash) - - d.addBoth(lastDefer.callback) - return lastDefer - - def test_findSrcHash(self): - """Tests finding the hash of a single source package.""" - lastDefer = defer.Deferred() - - src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.sourcesFile + - ' | grep -E "^Directory:" | head -n 1' + - ' | cut -d\ -f 2').read().rstrip('\n') - src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.sourcesFile + - ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + - ' | cut -d\ -f 2').read().split('\n')[:-1] - src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.sourcesFile + - ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + - ' | cut -d\ -f 4').read().split('\n')[:-1] - - i = choice(range(len(src_hashes))) - d = self.client.findHash(src_dir + '/' + src_paths[i]) - d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i]) - - d.addBoth(lastDefer.callback) - return lastDefer - - def test_multipleFindHash(self): - """Tests finding the hash of an index file, binary package, source package, and another index file.""" - lastDefer = defer.Deferred() - - # Lookup a Packages.bz2 file - idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + - '/var/lib/apt/lists/' + self.releaseFile + - ' | grep -E " main/binary-i386/Packages.bz2$"' - ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n') - idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2' - - d = self.client.findHash(idx_path) - d.addCallback(self.verifyHash, idx_path, idx_hash) - - # Lookup the binary 'dpkg' package - pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.packagesFile + - ' | grep -E "^SHA1:" | head -n 1' + - ' | cut -d\ -f 2').read().rstrip('\n') - pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.packagesFile + - ' | grep -E "^Filename:" | head -n 1' + - ' | cut -d\ -f 2').read().rstrip('\n') - - d = self.client.findHash(pkg_path) - d.addCallback(self.verifyHash, pkg_path, pkg_hash) - - # Lookup the source 'dpkg' package - src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.sourcesFile + - ' | grep -E "^Directory:" | head -n 1' + - ' | cut -d\ -f 2').read().rstrip('\n') - src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.sourcesFile + - ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + - ' | cut -d\ -f 2').read().split('\n')[:-1] - src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.sourcesFile + - ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + - ' | cut -d\ -f 4').read().split('\n')[:-1] - - for i in range(len(src_hashes)): - d = self.client.findHash(src_dir + '/' + src_paths[i]) - d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i]) - - # Lookup a Sources.bz2 file - idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + - '/var/lib/apt/lists/' + self.releaseFile + - ' | grep -E " main/source/Sources.bz2$"' - ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n') - idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2' - - d = self.client.findHash(idx_path) - d.addCallback(self.verifyHash, idx_path, idx_hash) - - d.addBoth(lastDefer.callback) - return lastDefer - - def tearDown(self): - for p in self.pending_calls: - if p.active(): - p.cancel() - self.pending_calls = [] - self.client.cleanup() - self.client = None diff --git a/apt_dht/CacheManager.py b/apt_dht/CacheManager.py deleted file mode 100644 index ccf13c5..0000000 --- a/apt_dht/CacheManager.py +++ /dev/null @@ -1,440 +0,0 @@ - -"""Manage a cache of downloaded files. - -@var DECOMPRESS_EXTS: a list of file extensions that need to be decompressed -@var DECOMPRESS_FILES: a list of file names that need to be decompressed -""" - -from bz2 import BZ2Decompressor -from zlib import decompressobj, MAX_WBITS -from gzip import FCOMMENT, FEXTRA, FHCRC, FNAME, FTEXT -from urlparse import urlparse -import os - -from twisted.python import log -from twisted.python.filepath import FilePath -from twisted.internet import defer, reactor -from twisted.trial import unittest -from twisted.web2 import stream -from twisted.web2.http import splitHostPort - -from Hash import HashObject - -DECOMPRESS_EXTS = ['.gz', '.bz2'] -DECOMPRESS_FILES = ['release', 'sources', 'packages'] - -class ProxyFileStream(stream.SimpleStream): - """Saves a stream to a file while providing a new stream. - - Also optionally decompresses the file while it is being downloaded. - - @type stream: L{twisted.web2.stream.IByteStream} - @ivar stream: the input stream being read - @type outFile: L{twisted.python.filepath.FilePath} - @ivar outFile: the file being written - @type hash: L{Hash.HashObject} - @ivar hash: the hash object for the file - @type gzfile: C{file} - @ivar gzfile: the open file to write decompressed gzip data to - @type gzdec: L{zlib.decompressobj} - @ivar gzdec: the decompressor to use for the compressed gzip data - @type gzheader: C{boolean} - @ivar gzheader: whether the gzip header still needs to be removed from - the zlib compressed data - @type bz2file: C{file} - @ivar bz2file: the open file to write decompressed bz2 data to - @type bz2dec: L{bz2.BZ2Decompressor} - @ivar bz2dec: the decompressor to use for the compressed bz2 data - @type length: C{int} - @ivar length: the length of the original (compressed) file - @type doneDefer: L{twisted.internet.defer.Deferred} - @ivar doneDefer: the deferred that will fire when done streaming - - @group Stream implementation: read, close - - """ - - def __init__(self, stream, outFile, hash, decompress = None, decFile = None): - """Initializes the proxy. - - @type stream: L{twisted.web2.stream.IByteStream} - @param stream: the input stream to read from - @type outFile: L{twisted.python.filepath.FilePath} - @param outFile: the file to write to - @type hash: L{Hash.HashObject} - @param hash: the hash object to use for the file - @type decompress: C{string} - @param decompress: also decompress the file as this type - (currently only '.gz' and '.bz2' are supported) - @type decFile: C{twisted.python.FilePath} - @param decFile: the file to write the decompressed data to - """ - self.stream = stream - self.outFile = outFile.open('w') - self.hash = hash - self.hash.new() - self.gzfile = None - self.bz2file = None - if decompress == ".gz": - self.gzheader = True - self.gzfile = decFile.open('w') - self.gzdec = decompressobj(-MAX_WBITS) - elif decompress == ".bz2": - self.bz2file = decFile.open('w') - self.bz2dec = BZ2Decompressor() - self.length = self.stream.length - self.doneDefer = defer.Deferred() - - def _done(self): - """Close all the output files, return the result.""" - if not self.outFile.closed: - self.outFile.close() - self.hash.digest() - if self.gzfile: - # Finish the decompression - data_dec = self.gzdec.flush() - self.gzfile.write(data_dec) - self.gzfile.close() - self.gzfile = None - if self.bz2file: - self.bz2file.close() - self.bz2file = None - - self.doneDefer.callback(self.hash) - - def read(self): - """Read some data from the stream.""" - if self.outFile.closed: - return None - - # Read data from the stream, deal with the possible deferred - data = self.stream.read() - if isinstance(data, defer.Deferred): - data.addCallbacks(self._write, self._done) - return data - - self._write(data) - return data - - def _write(self, data): - """Write the stream data to the file and return it for others to use. - - Also optionally decompresses it. - """ - if data is None: - self._done() - return data - - # Write and hash the streamed data - self.outFile.write(data) - self.hash.update(data) - - if self.gzfile: - # Decompress the zlib portion of the file - if self.gzheader: - # Remove the gzip header junk - self.gzheader = False - new_data = self._remove_gzip_header(data) - dec_data = self.gzdec.decompress(new_data) - else: - dec_data = self.gzdec.decompress(data) - self.gzfile.write(dec_data) - if self.bz2file: - # Decompress the bz2 file - dec_data = self.bz2dec.decompress(data) - self.bz2file.write(dec_data) - - return data - - def _remove_gzip_header(self, data): - """Remove the gzip header from the zlib compressed data.""" - # Read, check & discard the header fields - if data[:2] != '\037\213': - raise IOError, 'Not a gzipped file' - if ord(data[2]) != 8: - raise IOError, 'Unknown compression method' - flag = ord(data[3]) - # modtime = self.fileobj.read(4) - # extraflag = self.fileobj.read(1) - # os = self.fileobj.read(1) - - skip = 10 - if flag & FEXTRA: - # Read & discard the extra field - xlen = ord(data[10]) - xlen = xlen + 256*ord(data[11]) - skip = skip + 2 + xlen - if flag & FNAME: - # Read and discard a null-terminated string containing the filename - while True: - if not data[skip] or data[skip] == '\000': - break - skip += 1 - skip += 1 - if flag & FCOMMENT: - # Read and discard a null-terminated string containing a comment - while True: - if not data[skip] or data[skip] == '\000': - break - skip += 1 - skip += 1 - if flag & FHCRC: - skip += 2 # Read & discard the 16-bit header CRC - - return data[skip:] - - def close(self): - """Clean everything up and return None to future reads.""" - self.length = 0 - self._done() - self.stream.close() - -class CacheManager: - """Manages all downloaded files and requests for cached objects. - - @type cache_dir: L{twisted.python.filepath.FilePath} - @ivar cache_dir: the directory to use for storing all files - @type other_dirs: C{list} of L{twisted.python.filepath.FilePath} - @ivar other_dirs: the other directories that have shared files in them - @type all_dirs: C{list} of L{twisted.python.filepath.FilePath} - @ivar all_dirs: all the directories that have cached files in them - @type db: L{db.DB} - @ivar db: the database to use for tracking files and hashes - @type manager: L{apt_p2p.AptP2P} - @ivar manager: the main program object to send requests to - @type scanning: C{list} of L{twisted.python.filepath.FilePath} - @ivar scanning: all the directories that are currectly being scanned or waiting to be scanned - """ - - def __init__(self, cache_dir, db, other_dirs = [], manager = None): - """Initialize the instance and remove any untracked files from the DB.. - - @type cache_dir: L{twisted.python.filepath.FilePath} - @param cache_dir: the directory to use for storing all files - @type db: L{db.DB} - @param db: the database to use for tracking files and hashes - @type other_dirs: C{list} of L{twisted.python.filepath.FilePath} - @param other_dirs: the other directories that have shared files in them - (optional, defaults to only using the cache directory) - @type manager: L{apt_p2p.AptP2P} - @param manager: the main program object to send requests to - (optional, defaults to not calling back with cached files) - """ - self.cache_dir = cache_dir - self.other_dirs = other_dirs - self.all_dirs = self.other_dirs[:] - self.all_dirs.insert(0, self.cache_dir) - self.db = db - self.manager = manager - self.scanning = [] - - # Init the database, remove old files - self.db.removeUntrackedFiles(self.all_dirs) - - #{ Scanning directories - def scanDirectories(self): - """Scan the cache directories, hashing new and rehashing changed files.""" - assert not self.scanning, "a directory scan is already under way" - self.scanning = self.all_dirs[:] - self._scanDirectories() - - def _scanDirectories(self, result = None, walker = None): - """Walk each directory looking for cached files. - - @param result: the result of a DHT store request, not used (optional) - @param walker: the walker to use to traverse the current directory - (optional, defaults to creating a new walker from the first - directory in the L{CacheManager.scanning} list) - """ - # Need to start walking a new directory - if walker is None: - # If there are any left, get them - if self.scanning: - log.msg('started scanning directory: %s' % self.scanning[0].path) - walker = self.scanning[0].walk() - else: - log.msg('cache directory scan complete') - return - - try: - # Get the next file in the directory - file = walker.next() - except StopIteration: - # No files left, go to the next directory - log.msg('done scanning directory: %s' % self.scanning[0].path) - self.scanning.pop(0) - reactor.callLater(0, self._scanDirectories) - return - - # If it's not a file ignore it - if not file.isfile(): - log.msg('entering directory: %s' % file.path) - reactor.callLater(0, self._scanDirectories, None, walker) - return - - # If it's already properly in the DB, ignore it - db_status = self.db.isUnchanged(file) - if db_status: - log.msg('file is unchanged: %s' % file.path) - reactor.callLater(0, self._scanDirectories, None, walker) - return - - # Don't hash files in the cache that are not in the DB - if self.scanning[0] == self.cache_dir: - if db_status is None: - log.msg('ignoring unknown cache file: %s' % file.path) - else: - log.msg('removing changed cache file: %s' % file.path) - file.remove() - reactor.callLater(0, self._scanDirectories, None, walker) - return - - # Otherwise hash it - log.msg('start hash checking file: %s' % file.path) - hash = HashObject() - df = hash.hashInThread(file) - df.addBoth(self._doneHashing, file, walker) - df.addErrback(log.err) - - def _doneHashing(self, result, file, walker): - """If successful, add the hashed file to the DB and inform the main program.""" - if isinstance(result, HashObject): - log.msg('hash check of %s completed with hash: %s' % (file.path, result.hexdigest())) - - # Only set a URL if this is a downloaded file - url = None - if self.scanning[0] == self.cache_dir: - url = 'http:/' + file.path[len(self.cache_dir.path):] - - # Store the hashed file in the database - new_hash = self.db.storeFile(file, result.digest()) - - # Tell the main program to handle the new cache file - df = self.manager.new_cached_file(file, result, new_hash, url, True) - if df is None: - reactor.callLater(0, self._scanDirectories, None, walker) - else: - df.addBoth(self._scanDirectories, walker) - else: - # Must have returned an error - log.msg('hash check of %s failed' % file.path) - log.err(result) - reactor.callLater(0, self._scanDirectories, None, walker) - - #{ Downloading files - def save_file(self, response, hash, url): - """Save a downloaded file to the cache and stream it. - - @type response: L{twisted.web2.http.Response} - @param response: the response from the download - @type hash: L{Hash.HashObject} - @param hash: the hash object containing the expected hash for the file - @param url: the URI of the actual mirror request - @rtype: L{twisted.web2.http.Response} - @return: the final response from the download - """ - if response.code != 200: - log.msg('File was not found (%r): %s' % (response, url)) - return response - - log.msg('Returning file: %s' % url) - - # Set the destination path for the file - parsed = urlparse(url) - destFile = self.cache_dir.preauthChild(parsed[1] + parsed[2]) - log.msg('Saving returned %r byte file to cache: %s' % (response.stream.length, destFile.path)) - - # Make sure there's a free place for the file - if destFile.exists(): - log.msg('File already exists, removing: %s' % destFile.path) - destFile.remove() - elif not destFile.parent().exists(): - destFile.parent().makedirs() - - # Determine whether it needs to be decompressed and how - root, ext = os.path.splitext(destFile.basename()) - if root.lower() in DECOMPRESS_FILES and ext.lower() in DECOMPRESS_EXTS: - ext = ext.lower() - decFile = destFile.sibling(root) - log.msg('Decompressing to: %s' % decFile.path) - if decFile.exists(): - log.msg('File already exists, removing: %s' % decFile.path) - decFile.remove() - else: - ext = None - decFile = None - - # Create the new stream from the old one. - orig_stream = response.stream - response.stream = ProxyFileStream(orig_stream, destFile, hash, ext, decFile) - response.stream.doneDefer.addCallback(self._save_complete, url, destFile, - response.headers.getHeader('Last-Modified'), - decFile) - response.stream.doneDefer.addErrback(self.save_error, url) - - # Return the modified response with the new stream - return response - - def _save_complete(self, hash, url, destFile, modtime = None, decFile = None): - """Update the modification time and inform the main program. - - @type hash: L{Hash.HashObject} - @param hash: the hash object containing the expected hash for the file - @param url: the URI of the actual mirror request - @type destFile: C{twisted.python.FilePath} - @param destFile: the file where the download was written to - @type modtime: C{int} - @param modtime: the modified time of the cached file (seconds since epoch) - (optional, defaults to not setting the modification time of the file) - @type decFile: C{twisted.python.FilePath} - @param decFile: the file where the decompressed download was written to - (optional, defaults to the file not having been compressed) - """ - if modtime: - os.utime(destFile.path, (modtime, modtime)) - if decFile: - os.utime(decFile.path, (modtime, modtime)) - - result = hash.verify() - if result or result is None: - if result: - log.msg('Hashes match: %s' % url) - else: - log.msg('Hashed file to %s: %s' % (hash.hexdigest(), url)) - - new_hash = self.db.storeFile(destFile, hash.digest()) - log.msg('now avaliable: %s' % (url)) - - if self.manager: - self.manager.new_cached_file(destFile, hash, new_hash, url) - if decFile: - ext_len = len(destFile.path) - len(decFile.path) - self.manager.new_cached_file(decFile, None, False, url[:-ext_len]) - else: - log.msg("Hashes don't match %s != %s: %s" % (hash.hexexpected(), hash.hexdigest(), url)) - destFile.remove() - if decFile: - decFile.remove() - - def save_error(self, failure, url): - """An error has occurred in downloadign or saving the file.""" - log.msg('Error occurred downloading %s' % url) - log.err(failure) - return failure - -class TestMirrorManager(unittest.TestCase): - """Unit tests for the mirror manager.""" - - timeout = 20 - pending_calls = [] - client = None - - def setUp(self): - self.client = CacheManager(FilePath('/tmp/.apt-p2p')) - - def tearDown(self): - for p in self.pending_calls: - if p.active(): - p.cancel() - self.client = None - \ No newline at end of file diff --git a/apt_dht/HTTPDownloader.py b/apt_dht/HTTPDownloader.py deleted file mode 100644 index eb36932..0000000 --- a/apt_dht/HTTPDownloader.py +++ /dev/null @@ -1,423 +0,0 @@ - -"""Manage all download requests to a single site.""" - -from math import exp -from datetime import datetime, timedelta - -from twisted.internet import reactor, defer, protocol -from twisted.internet.protocol import ClientFactory -from twisted import version as twisted_version -from twisted.python import log -from twisted.web2.client.interfaces import IHTTPClientManager -from twisted.web2.client.http import ProtocolError, ClientRequest, HTTPClientProtocol -from twisted.web2 import stream as stream_mod, http_headers -from twisted.web2 import version as web2_version -from twisted.trial import unittest -from zope.interface import implements - -from apt_p2p_conf import version - -class Peer(ClientFactory): - """A manager for all HTTP requests to a single peer. - - Controls all requests that go to a single peer (host and port). - This includes buffering requests until they can be sent and reconnecting - in the event of the connection being closed. - - """ - - implements(IHTTPClientManager) - - def __init__(self, host, port=80): - self.host = host - self.port = port - self.busy = False - self.pipeline = False - self.closed = True - self.connecting = False - self.request_queue = [] - self.response_queue = [] - self.proto = None - self.connector = None - self._errors = 0 - self._completed = 0 - self._downloadSpeeds = [] - self._lastResponse = None - self._responseTimes = [] - - #{ Manage the request queue - def connect(self): - """Connect to the peer.""" - assert self.closed and not self.connecting - self.connecting = True - d = protocol.ClientCreator(reactor, HTTPClientProtocol, self).connectTCP(self.host, self.port) - d.addCallback(self.connected) - - def connected(self, proto): - """Begin processing the queued requests.""" - self.closed = False - self.connecting = False - self.proto = proto - self.processQueue() - - def close(self): - """Close the connection to the peer.""" - if not self.closed: - self.proto.transport.loseConnection() - - def submitRequest(self, request): - """Add a new request to the queue. - - @type request: L{twisted.web2.client.http.ClientRequest} - @return: deferred that will fire with the completed request - """ - request.submissionTime = datetime.now() - request.deferRequest = defer.Deferred() - self.request_queue.append(request) - self.processQueue() - return request.deferRequest - - def processQueue(self): - """Check the queue to see if new requests can be sent to the peer.""" - if not self.request_queue: - return - if self.connecting: - return - if self.closed: - self.connect() - return - if self.busy and not self.pipeline: - return - if self.response_queue and not self.pipeline: - return - - req = self.request_queue.pop(0) - self.response_queue.append(req) - req.deferResponse = self.proto.submitRequest(req, False) - req.deferResponse.addCallbacks(self.requestComplete, self.requestError) - - def requestComplete(self, resp): - """Process a completed request.""" - self._processLastResponse() - req = self.response_queue.pop(0) - log.msg('%s of %s completed with code %d' % (req.method, req.uri, resp.code)) - self._completed += 1 - if resp.code >= 400: - self._errors += 1 - now = datetime.now() - self._responseTimes.append((now, now - req.submissionTime)) - self._lastResponse = (now, resp.stream.length) - req.deferRequest.callback(resp) - - def requestError(self, error): - """Process a request that ended with an error.""" - self._processLastResponse() - req = self.response_queue.pop(0) - log.msg('Download of %s generated error %r' % (req.uri, error)) - self._completed += 1 - self._errors += 1 - req.deferRequest.errback(error) - - def hashError(self, error): - """Log that a hash error occurred from the peer.""" - log.msg('Hash error from peer (%s, %d): %r' % (self.host, self.port, error)) - self._errors += 1 - - #{ IHTTPClientManager interface - def clientBusy(self, proto): - """Save the busy state.""" - self.busy = True - - def clientIdle(self, proto): - """Try to send a new request.""" - self._processLastResponse() - self.busy = False - self.processQueue() - - def clientPipelining(self, proto): - """Try to send a new request.""" - self.pipeline = True - self.processQueue() - - def clientGone(self, proto): - """Mark sent requests as errors.""" - self._processLastResponse() - for req in self.response_queue: - req.deferRequest.errback(ProtocolError('lost connection')) - self.busy = False - self.pipeline = False - self.closed = True - self.connecting = False - self.response_queue = [] - self.proto = None - if self.request_queue: - self.processQueue() - - #{ Downloading request interface - def setCommonHeaders(self): - """Get the common HTTP headers for all requests.""" - headers = http_headers.Headers() - headers.setHeader('Host', self.host) - headers.setHeader('User-Agent', 'apt-p2p/%s (twisted/%s twisted.web2/%s)' % - (version.short(), twisted_version.short(), web2_version.short())) - return headers - - def get(self, path, method="GET", modtime=None): - """Add a new request to the queue. - - @type path: C{string} - @param path: the path to request from the peer - @type method: C{string} - @param method: the HTTP method to use, 'GET' or 'HEAD' - (optional, defaults to 'GET') - @type modtime: C{int} - @param modtime: the modification time to use for an 'If-Modified-Since' - header, as seconds since the epoch - (optional, defaults to not sending that header) - """ - headers = self.setCommonHeaders() - if modtime: - headers.setHeader('If-Modified-Since', modtime) - return self.submitRequest(ClientRequest(method, path, headers, None)) - - def getRange(self, path, rangeStart, rangeEnd, method="GET"): - """Add a new request with a Range header to the queue. - - @type path: C{string} - @param path: the path to request from the peer - @type rangeStart: C{int} - @param rangeStart: the byte to begin the request at - @type rangeEnd: C{int} - @param rangeEnd: the byte to end the request at (inclusive) - @type method: C{string} - @param method: the HTTP method to use, 'GET' or 'HEAD' - (optional, defaults to 'GET') - """ - headers = self.setCommonHeaders() - headers.setHeader('Range', ('bytes', [(rangeStart, rangeEnd)])) - return self.submitRequest(ClientRequest(method, path, headers, None)) - - #{ Peer information - def isIdle(self): - """Check whether the peer is idle or not.""" - return not self.busy and not self.request_queue and not self.response_queue - - def _processLastResponse(self): - """Save the download time of the last request for speed calculations.""" - if self._lastResponse is not None: - now = datetime.now() - self._downloadSpeeds.append((now, now - self._lastResponse[0], self._lastResponse[1])) - self._lastResponse = None - - def downloadSpeed(self): - """Gets the latest average download speed for the peer. - - The average is over the last 10 responses that occurred in the last hour. - """ - total_time = 0.0 - total_download = 0 - now = datetime.now() - while self._downloadSpeeds and (len(self._downloadSpeeds) > 10 or - now - self._downloadSpeeds[0][0] > timedelta(seconds=3600)): - self._downloadSpeeds.pop(0) - - # If there are none, then you get 0 - if not self._downloadSpeeds: - return 0.0 - - for download in self._downloadSpeeds: - total_time += download[1].days*86400.0 + download[1].seconds + download[1].microseconds/1000000.0 - total_download += download[2] - - return total_download / total_time - - def responseTime(self): - """Gets the latest average response time for the peer. - - Response time is the time from receiving the request, to the time - the download begins. The average is over the last 10 responses that - occurred in the last hour. - """ - total_response = 0.0 - now = datetime.now() - while self._responseTimes and (len(self._responseTimes) > 10 or - now - self._responseTimes[0][0] > timedelta(seconds=3600)): - self._responseTimes.pop(0) - - # If there are none, give it the benefit of the doubt - if not self._responseTimes: - return 0.0 - - for response in self._responseTimes: - total_response += response[1].days*86400.0 + response[1].seconds + response[1].microseconds/1000000.0 - - return total_response / len(self._responseTimes) - - def rank(self, fastest): - """Determine the ranking value for the peer. - - The ranking value is composed of 5 numbers: - - 1 if a connection to the peer is open, 0.9 otherwise - - 1 if there are no pending requests, to 0 if there are a maximum - - 1 if the peer is the fastest of all peers, to 0 if the speed is 0 - - 1 if all requests are good, 0 if all produced errors - - an exponentially decreasing number based on the response time - """ - rank = 1.0 - if self.closed: - rank *= 0.9 - rank *= (max(0.0, 10.0 - len(self.request_queue) - len(self.response_queue))) / 10.0 - if fastest > 0.0: - rank *= min(1.0, self.downloadSpeed() / fastest) - if self._completed: - rank *= max(0.0, 1.0 - float(self._errors) / self._completed) - rank *= exp(-self.responseTime() / 5.0) - return rank - -class TestClientManager(unittest.TestCase): - """Unit tests for the Peer.""" - - client = None - pending_calls = [] - - def gotResp(self, resp, num, expect): - self.failUnless(resp.code >= 200 and resp.code < 300, "Got a non-200 response: %r" % resp.code) - if expect is not None: - self.failUnless(resp.stream.length == expect, "Length was incorrect, got %r, expected %r" % (resp.stream.length, expect)) - def print_(n): - pass - def printdone(n): - pass - stream_mod.readStream(resp.stream, print_).addCallback(printdone) - - def test_download(self): - """Tests a normal download.""" - host = 'www.ietf.org' - self.client = Peer(host, 80) - self.timeout = 10 - - d = self.client.get('/rfc/rfc0013.txt') - d.addCallback(self.gotResp, 1, 1070) - return d - - def test_head(self): - """Tests a 'HEAD' request.""" - host = 'www.ietf.org' - self.client = Peer(host, 80) - self.timeout = 10 - - d = self.client.get('/rfc/rfc0013.txt', "HEAD") - d.addCallback(self.gotResp, 1, 0) - return d - - def test_multiple_downloads(self): - """Tests multiple downloads with queueing and connection closing.""" - host = 'www.ietf.org' - self.client = Peer(host, 80) - self.timeout = 120 - lastDefer = defer.Deferred() - - def newRequest(path, num, expect, last=False): - d = self.client.get(path) - d.addCallback(self.gotResp, num, expect) - if last: - d.addBoth(lastDefer.callback) - - # 3 quick requests - newRequest("/rfc/rfc0006.txt", 1, 1776) - newRequest("/rfc/rfc2362.txt", 2, 159833) - newRequest("/rfc/rfc0801.txt", 3, 40824) - - # This one will probably be queued - self.pending_calls.append(reactor.callLater(1, newRequest, '/rfc/rfc0013.txt', 4, 1070)) - - # Connection should still be open, but idle - self.pending_calls.append(reactor.callLater(10, newRequest, '/rfc/rfc0022.txt', 5, 4606)) - - #Connection should be closed - self.pending_calls.append(reactor.callLater(30, newRequest, '/rfc/rfc0048.txt', 6, 41696)) - self.pending_calls.append(reactor.callLater(31, newRequest, '/rfc/rfc3261.txt', 7, 647976)) - self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0014.txt', 8, 27)) - self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0001.txt', 9, 21088)) - - # Now it should definitely be closed - self.pending_calls.append(reactor.callLater(62, newRequest, '/rfc/rfc2801.txt', 0, 598794, True)) - return lastDefer - - def test_multiple_quick_downloads(self): - """Tests lots of multiple downloads with queueing.""" - host = 'www.ietf.org' - self.client = Peer(host, 80) - self.timeout = 30 - lastDefer = defer.Deferred() - - def newRequest(path, num, expect, last=False): - d = self.client.get(path) - d.addCallback(self.gotResp, num, expect) - if last: - d.addBoth(lastDefer.callback) - - newRequest("/rfc/rfc0006.txt", 1, 1776) - newRequest("/rfc/rfc2362.txt", 2, 159833) - newRequest("/rfc/rfc0801.txt", 3, 40824) - self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0013.txt', 4, 1070)) - self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0022.txt', 5, 4606)) - self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0048.txt', 6, 41696)) - self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc3261.txt', 7, 647976)) - self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0014.txt', 8, 27)) - self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0001.txt', 9, 21088)) - self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc2801.txt', 0, 598794, True)) - return lastDefer - - def checkInfo(self): - log.msg('Rank is: %r' % self.client.rank(250.0*1024)) - log.msg('Download speed is: %r' % self.client.downloadSpeed()) - log.msg('Response Time is: %r' % self.client.responseTime()) - - def test_peer_info(self): - """Test retrieving the peer info during a download.""" - host = 'www.ietf.org' - self.client = Peer(host, 80) - self.timeout = 120 - lastDefer = defer.Deferred() - - def newRequest(path, num, expect, last=False): - d = self.client.get(path) - d.addCallback(self.gotResp, num, expect) - if last: - d.addBoth(lastDefer.callback) - - newRequest("/rfc/rfc0006.txt", 1, 1776) - newRequest("/rfc/rfc2362.txt", 2, 159833) - newRequest("/rfc/rfc0801.txt", 3, 40824) - self.pending_calls.append(reactor.callLater(1, newRequest, '/rfc/rfc0013.txt', 4, 1070)) - self.pending_calls.append(reactor.callLater(10, newRequest, '/rfc/rfc0022.txt', 5, 4606)) - self.pending_calls.append(reactor.callLater(30, newRequest, '/rfc/rfc0048.txt', 6, 41696)) - self.pending_calls.append(reactor.callLater(31, newRequest, '/rfc/rfc3261.txt', 7, 647976)) - self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0014.txt', 8, 27)) - self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0001.txt', 9, 21088)) - self.pending_calls.append(reactor.callLater(62, newRequest, '/rfc/rfc2801.txt', 0, 598794, True)) - - for i in xrange(2, 122, 2): - self.pending_calls.append(reactor.callLater(i, self.checkInfo)) - - return lastDefer - - def test_range(self): - """Test a Range request.""" - host = 'www.ietf.org' - self.client = Peer(host, 80) - self.timeout = 10 - - d = self.client.getRange('/rfc/rfc0013.txt', 100, 199) - d.addCallback(self.gotResp, 1, 100) - return d - - def tearDown(self): - for p in self.pending_calls: - if p.active(): - p.cancel() - self.pending_calls = [] - if self.client: - self.client.close() - self.client = None diff --git a/apt_dht/HTTPServer.py b/apt_dht/HTTPServer.py deleted file mode 100644 index d252a63..0000000 --- a/apt_dht/HTTPServer.py +++ /dev/null @@ -1,242 +0,0 @@ - -"""Serve local requests from apt and remote requests from peers.""" - -from urllib import unquote_plus -from binascii import b2a_hex - -from twisted.python import log -from twisted.internet import defer -from twisted.web2 import server, http, resource, channel, stream -from twisted.web2 import static, http_headers, responsecode - -from policies import ThrottlingFactory -from apt_p2p_Khashmir.bencode import bencode - -class FileDownloader(static.File): - """Modified to make it suitable for apt requests. - - Tries to find requests in the cache. Found files are first checked for - freshness before being sent. Requests for unfound and stale files are - forwarded to the main program for downloading. - - @type manager: L{apt_p2p.AptP2P} - @ivar manager: the main program to query - """ - - def __init__(self, path, manager, defaultType="text/plain", ignoredExts=(), processors=None, indexNames=None): - self.manager = manager - super(FileDownloader, self).__init__(path, defaultType, ignoredExts, processors, indexNames) - - def renderHTTP(self, req): - log.msg('Got request for %s from %s' % (req.uri, req.remoteAddr)) - resp = super(FileDownloader, self).renderHTTP(req) - if isinstance(resp, defer.Deferred): - resp.addCallback(self._renderHTTP_done, req) - else: - resp = self._renderHTTP_done(resp, req) - return resp - - def _renderHTTP_done(self, resp, req): - log.msg('Initial response to %s: %r' % (req.uri, resp)) - - if self.manager: - path = 'http:/' + req.uri - if resp.code >= 200 and resp.code < 400: - return self.manager.check_freshness(req, path, resp.headers.getHeader('Last-Modified'), resp) - - log.msg('Not found, trying other methods for %s' % req.uri) - return self.manager.get_resp(req, path) - - return resp - - def createSimilarFile(self, path): - return self.__class__(path, self.manager, self.defaultType, self.ignoredExts, - self.processors, self.indexNames[:]) - -class FileUploaderStream(stream.FileStream): - """Modified to make it suitable for streaming to peers. - - Streams the file is small chunks to make it easier to throttle the - streaming to peers. - - @ivar CHUNK_SIZE: the size of chunks of data to send at a time - """ - - CHUNK_SIZE = 4*1024 - - def read(self, sendfile=False): - if self.f is None: - return None - - length = self.length - if length == 0: - self.f = None - return None - - # Remove the SendFileBuffer and mmap use, just use string reads and writes - - readSize = min(length, self.CHUNK_SIZE) - - self.f.seek(self.start) - b = self.f.read(readSize) - bytesRead = len(b) - if not bytesRead: - raise RuntimeError("Ran out of data reading file %r, expected %d more bytes" % (self.f, length)) - else: - self.length -= bytesRead - self.start += bytesRead - return b - - -class FileUploader(static.File): - """Modified to make it suitable for peer requests. - - Uses the modified L{FileUploaderStream} to stream the file for throttling, - and doesn't do any listing of directory contents. - """ - - def render(self, req): - if not self.fp.exists(): - return responsecode.NOT_FOUND - - if self.fp.isdir(): - # Don't try to render a directory listing - return responsecode.NOT_FOUND - - try: - f = self.fp.open() - except IOError, e: - import errno - if e[0] == errno.EACCES: - return responsecode.FORBIDDEN - elif e[0] == errno.ENOENT: - return responsecode.NOT_FOUND - else: - raise - - response = http.Response() - # Use the modified FileStream - response.stream = FileUploaderStream(f, 0, self.fp.getsize()) - - for (header, value) in ( - ("content-type", self.contentType()), - ("content-encoding", self.contentEncoding()), - ): - if value is not None: - response.headers.setHeader(header, value) - - return response - -class TopLevel(resource.Resource): - """The HTTP server for all requests, both from peers and apt. - - @type directory: L{twisted.python.filepath.FilePath} - @ivar directory: the directory to check for cached files - @type db: L{db.DB} - @ivar db: the database to use for looking up files and hashes - @type manager: L{apt_p2p.AptP2P} - @ivar manager: the main program object to send requests to - @type factory: L{twisted.web2.channel.HTTPFactory} or L{policies.ThrottlingFactory} - @ivar factory: the factory to use to server HTTP requests - - """ - - addSlash = True - - def __init__(self, directory, db, manager): - """Initialize the instance. - - @type directory: L{twisted.python.filepath.FilePath} - @param directory: the directory to check for cached files - @type db: L{db.DB} - @param db: the database to use for looking up files and hashes - @type manager: L{apt_p2p.AptP2P} - @param manager: the main program object to send requests to - """ - self.directory = directory - self.db = db - self.manager = manager - self.factory = None - - def getHTTPFactory(self): - """Initialize and get the factory for this HTTP server.""" - if self.factory is None: - self.factory = channel.HTTPFactory(server.Site(self), - **{'maxPipeline': 10, - 'betweenRequestsTimeOut': 60}) - self.factory = ThrottlingFactory(self.factory, writeLimit = 30*1024) - return self.factory - - def render(self, ctx): - """Render a web page with descriptive statistics.""" - return http.Response( - 200, - {'content-type': http_headers.MimeType('text', 'html')}, - """ -

Statistics

TODO: eventually some stats will be shown here.""") - - def locateChild(self, request, segments): - """Process the incoming request.""" - log.msg('Got HTTP request for %s from %s' % (request.uri, request.remoteAddr)) - name = segments[0] - - # If the request is for a shared file (from a peer) - if name == '~': - if len(segments) != 2: - log.msg('Got a malformed request from %s' % request.remoteAddr) - return None, () - - # Find the file in the database - hash = unquote_plus(segments[1]) - files = self.db.lookupHash(hash) - if files: - # If it is a file, return it - if 'path' in files[0]: - log.msg('Sharing %s with %s' % (files[0]['path'].path, request.remoteAddr)) - return FileUploader(files[0]['path'].path), () - else: - # It's not for a file, but for a piece string, so return that - log.msg('Sending torrent string %s to %s' % (b2a_hex(hash), request.remoteAddr)) - return static.Data(bencode({'t': files[0]['pieces']}), 'application/x-bencoded'), () - else: - log.msg('Hash could not be found in database: %s' % hash) - - # Only local requests (apt) get past this point - if request.remoteAddr.host != "127.0.0.1": - log.msg('Blocked illegal access to %s from %s' % (request.uri, request.remoteAddr)) - return None, () - - if len(name) > 1: - # It's a request from apt - return FileDownloader(self.directory.path, self.manager), segments[0:] - else: - # Will render the statistics page - return self, () - - log.msg('Got a malformed request for "%s" from %s' % (request.uri, request.remoteAddr)) - return None, () - -if __name__ == '__builtin__': - # Running from twistd -ny HTTPServer.py - # Then test with: - # wget -S 'http://localhost:18080/~/whatever' - # wget -S 'http://localhost:18080/~/pieces' - - import os.path - from twisted.python.filepath import FilePath - - class DB: - def lookupHash(self, hash): - if hash == 'pieces': - return [{'pieces': 'abcdefghij0123456789\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!'}] - return [{'path': FilePath(os.path.expanduser('~/school/optout'))}] - - t = TopLevel(FilePath(os.path.expanduser('~')), DB(), None) - factory = t.getHTTPFactory() - - # Standard twisted application Boilerplate - from twisted.application import service, strports - application = service.Application("demoserver") - s = strports.service('tcp:18080', factory) - s.setServiceParent(application) diff --git a/apt_dht/Hash.py b/apt_dht/Hash.py deleted file mode 100644 index 850f393..0000000 --- a/apt_dht/Hash.py +++ /dev/null @@ -1,342 +0,0 @@ - -"""Hash and store hash information for a file. - -@var PIECE_SIZE: the piece size to use for hashing pieces of files - -""" - -from binascii import b2a_hex, a2b_hex -import sys - -from twisted.internet import threads, defer -from twisted.trial import unittest - -PIECE_SIZE = 512*1024 - -class HashError(ValueError): - """An error has occurred while hashing a file.""" - -class HashObject: - """Manages hashes and hashing for a file. - - @ivar ORDER: the priority ordering of hashes, and how to extract them - - """ - - ORDER = [ {'name': 'sha1', - 'length': 20, - 'AptPkgRecord': 'SHA1Hash', - 'AptSrcRecord': False, - 'AptIndexRecord': 'SHA1', - 'old_module': 'sha', - 'hashlib_func': 'sha1', - }, - {'name': 'sha256', - 'length': 32, - 'AptPkgRecord': 'SHA256Hash', - 'AptSrcRecord': False, - 'AptIndexRecord': 'SHA256', - 'hashlib_func': 'sha256', - }, - {'name': 'md5', - 'length': 16, - 'AptPkgRecord': 'MD5Hash', - 'AptSrcRecord': True, - 'AptIndexRecord': 'MD5SUM', - 'old_module': 'md5', - 'hashlib_func': 'md5', - }, - ] - - def __init__(self, digest = None, size = None, pieces = ''): - """Initialize the hash object.""" - self.hashTypeNum = 0 # Use the first if nothing else matters - if sys.version_info < (2, 5): - # sha256 is not available in python before 2.5, remove it - for hashType in self.ORDER: - if hashType['name'] == 'sha256': - del self.ORDER[self.ORDER.index(hashType)] - break - - self.expHash = None - self.expHex = None - self.expSize = None - self.expNormHash = None - self.fileHasher = None - self.pieceHasher = None - self.fileHash = digest - self.pieceHash = [pieces[x:x+self.ORDER[self.hashTypeNum]['length']] - for x in xrange(0, len(pieces), self.ORDER[self.hashTypeNum]['length'])] - self.size = size - self.fileHex = None - self.fileNormHash = None - self.done = True - self.result = None - - #{ Hashing data - def new(self, force = False): - """Generate a new hashing object suitable for hashing a file. - - @param force: set to True to force creating a new object even if - the hash has been verified already - """ - if self.result is None or force: - self.result = None - self.done = False - self.fileHasher = self._new() - self.pieceHasher = None - self.fileHash = None - self.pieceHash = [] - self.size = 0 - self.fileHex = None - self.fileNormHash = None - - def _new(self): - """Create a new hashing object according to the hash type.""" - if sys.version_info < (2, 5): - mod = __import__(self.ORDER[self.hashTypeNum]['old_module'], globals(), locals(), []) - return mod.new() - else: - import hashlib - func = getattr(hashlib, self.ORDER[self.hashTypeNum]['hashlib_func']) - return func() - - def update(self, data): - """Add more data to the file hasher.""" - if self.result is None: - if self.done: - raise HashError, "Already done, you can't add more data after calling digest() or verify()" - if self.fileHasher is None: - raise HashError, "file hasher not initialized" - - if not self.pieceHasher and self.size + len(data) > PIECE_SIZE: - # Hash up to the piece size - self.fileHasher.update(data[:(PIECE_SIZE - self.size)]) - data = data[(PIECE_SIZE - self.size):] - self.size = PIECE_SIZE - - # Save the first piece digest and initialize a new piece hasher - self.pieceHash.append(self.fileHasher.digest()) - self.pieceHasher = self._new() - - if self.pieceHasher: - # Loop in case the data contains multiple pieces - piece_size = self.size % PIECE_SIZE - while piece_size + len(data) > PIECE_SIZE: - # Save the piece hash and start a new one - self.pieceHasher.update(data[:(PIECE_SIZE - piece_size)]) - self.pieceHash.append(self.pieceHasher.digest()) - self.pieceHasher = self._new() - - # Don't forget to hash the data normally - self.fileHasher.update(data[:(PIECE_SIZE - piece_size)]) - data = data[(PIECE_SIZE - piece_size):] - self.size += PIECE_SIZE - piece_size - piece_size = self.size % PIECE_SIZE - - # Hash any remaining data - self.pieceHasher.update(data) - - self.fileHasher.update(data) - self.size += len(data) - - def hashInThread(self, file): - """Hashes a file in a separate thread, returning a deferred that will callback with the result.""" - file.restat(False) - if not file.exists(): - df = defer.Deferred() - df.errback(HashError("file not found")) - return df - - df = threads.deferToThread(self._hashInThread, file) - return df - - def _hashInThread(self, file): - """Hashes a file, returning itself as the result.""" - f = file.open() - self.new(force = True) - data = f.read(4096) - while data: - self.update(data) - data = f.read(4096) - self.digest() - return self - - #{ Checking hashes of data - def pieceDigests(self): - """Get the piece hashes of the added file data.""" - self.digest() - return self.pieceHash - - def digest(self): - """Get the hash of the added file data.""" - if self.fileHash is None: - if self.fileHasher is None: - raise HashError, "you must hash some data first" - self.fileHash = self.fileHasher.digest() - self.done = True - - # Save the last piece hash - if self.pieceHasher: - self.pieceHash.append(self.pieceHasher.digest()) - return self.fileHash - - def hexdigest(self): - """Get the hash of the added file data in hex format.""" - if self.fileHex is None: - self.fileHex = b2a_hex(self.digest()) - return self.fileHex - - def verify(self): - """Verify that the added file data hash matches the expected hash.""" - if self.result is None and self.fileHash is not None and self.expHash is not None: - self.result = (self.fileHash == self.expHash and self.size == self.expSize) - return self.result - - #{ Expected hash - def expected(self): - """Get the expected hash.""" - return self.expHash - - def hexexpected(self): - """Get the expected hash in hex format.""" - if self.expHex is None and self.expHash is not None: - self.expHex = b2a_hex(self.expHash) - return self.expHex - - #{ Setting the expected hash - def set(self, hashType, hashHex, size): - """Initialize the hash object. - - @param hashType: must be one of the dictionaries from L{ORDER} - """ - self.hashTypeNum = self.ORDER.index(hashType) # error if not found - self.expHex = hashHex - self.expSize = int(size) - self.expHash = a2b_hex(self.expHex) - - def setFromIndexRecord(self, record): - """Set the hash from the cache of index file records. - - @type record: C{dictionary} - @param record: keys are hash types, values are tuples of (hash, size) - """ - for hashType in self.ORDER: - result = record.get(hashType['AptIndexRecord'], None) - if result: - self.set(hashType, result[0], result[1]) - return True - return False - - def setFromPkgRecord(self, record, size): - """Set the hash from Apt's binary packages cache. - - @param record: whatever is returned by apt_pkg.GetPkgRecords() - """ - for hashType in self.ORDER: - hashHex = getattr(record, hashType['AptPkgRecord'], None) - if hashHex: - self.set(hashType, hashHex, size) - return True - return False - - def setFromSrcRecord(self, record): - """Set the hash from Apt's source package records cache. - - Currently very simple since Apt only tracks MD5 hashes of source files. - - @type record: (C{string}, C{int}, C{string}) - @param record: the hash, size and path of the source file - """ - for hashType in self.ORDER: - if hashType['AptSrcRecord']: - self.set(hashType, record[0], record[1]) - return True - return False - -class TestHashObject(unittest.TestCase): - """Unit tests for the hash objects.""" - - timeout = 5 - if sys.version_info < (2, 4): - skip = "skippingme" - - def test_failure(self): - """Tests that the hash object fails when treated badly.""" - h = HashObject() - h.set(h.ORDER[0], b2a_hex('12345678901234567890'), '0') - self.failUnlessRaises(HashError, h.digest) - self.failUnlessRaises(HashError, h.hexdigest) - self.failUnlessRaises(HashError, h.update, 'gfgf') - - def test_pieces(self): - """Tests the hashing of large files into pieces.""" - h = HashObject() - h.new() - h.update('1234567890'*120*1024) - self.failUnless(h.digest() == '1(j\xd2q\x0b\n\x91\xd2\x13\x90\x15\xa3E\xcc\xb0\x8d.\xc3\xc5') - pieces = h.pieceDigests() - self.failUnless(len(pieces) == 3) - self.failUnless(pieces[0] == ',G \xd8\xbbPl\xf1\xa3\xa0\x0cW\n\xe6\xe6a\xc9\x95/\xe5') - self.failUnless(pieces[1] == '\xf6V\xeb/\xa8\xad[\x07Z\xf9\x87\xa4\xf5w\xdf\xe1|\x00\x8e\x93') - self.failUnless(pieces[2] == 'M[\xbf\xee\xaa+\x19\xbaV\xf699\r\x17o\xcb\x8e\xcfP\x19') - h.new(True) - for i in xrange(120*1024): - h.update('1234567890') - pieces = h.pieceDigests() - self.failUnless(h.digest() == '1(j\xd2q\x0b\n\x91\xd2\x13\x90\x15\xa3E\xcc\xb0\x8d.\xc3\xc5') - self.failUnless(len(pieces) == 3) - self.failUnless(pieces[0] == ',G \xd8\xbbPl\xf1\xa3\xa0\x0cW\n\xe6\xe6a\xc9\x95/\xe5') - self.failUnless(pieces[1] == '\xf6V\xeb/\xa8\xad[\x07Z\xf9\x87\xa4\xf5w\xdf\xe1|\x00\x8e\x93') - self.failUnless(pieces[2] == 'M[\xbf\xee\xaa+\x19\xbaV\xf699\r\x17o\xcb\x8e\xcfP\x19') - - def test_sha1(self): - """Test hashing using the SHA1 hash.""" - h = HashObject() - found = False - for hashType in h.ORDER: - if hashType['name'] == 'sha1': - found = True - break - self.failUnless(found == True) - h.set(hashType, '3bba0a5d97b7946ad2632002bf9caefe2cb18e00', '19') - h.new() - h.update('apt-p2p is the best') - self.failUnless(h.hexdigest() == '3bba0a5d97b7946ad2632002bf9caefe2cb18e00') - self.failUnlessRaises(HashError, h.update, 'gfgf') - self.failUnless(h.verify() == True) - - def test_md5(self): - """Test hashing using the MD5 hash.""" - h = HashObject() - found = False - for hashType in h.ORDER: - if hashType['name'] == 'md5': - found = True - break - self.failUnless(found == True) - h.set(hashType, '6b5abdd30d7ed80edd229f9071d8c23c', '19') - h.new() - h.update('apt-p2p is the best') - self.failUnless(h.hexdigest() == '6b5abdd30d7ed80edd229f9071d8c23c') - self.failUnlessRaises(HashError, h.update, 'gfgf') - self.failUnless(h.verify() == True) - - def test_sha256(self): - """Test hashing using the SHA256 hash.""" - h = HashObject() - found = False - for hashType in h.ORDER: - if hashType['name'] == 'sha256': - found = True - break - self.failUnless(found == True) - h.set(hashType, '47f2238a30a0340faa2bf01a9bdc42ba77b07b411cda1e24cd8d7b5c4b7d82a7', '19') - h.new() - h.update('apt-p2p is the best') - self.failUnless(h.hexdigest() == '47f2238a30a0340faa2bf01a9bdc42ba77b07b411cda1e24cd8d7b5c4b7d82a7') - self.failUnlessRaises(HashError, h.update, 'gfgf') - self.failUnless(h.verify() == True) - - if sys.version_info < (2, 5): - test_sha256.skip = "SHA256 hashes are not supported by Python until version 2.5" diff --git a/apt_dht/MirrorManager.py b/apt_dht/MirrorManager.py deleted file mode 100644 index 4c19f10..0000000 --- a/apt_dht/MirrorManager.py +++ /dev/null @@ -1,245 +0,0 @@ - -"""Manage the multiple mirrors that may be requested. - -@var aptpkg_dir: the name of the directory to use for mirror files -""" - -from urlparse import urlparse -import os - -from twisted.python import log -from twisted.python.filepath import FilePath -from twisted.internet import defer -from twisted.trial import unittest -from twisted.web2.http import splitHostPort - -from AptPackages import AptPackages - -aptpkg_dir='apt-packages' - -class MirrorError(Exception): - """Exception raised when there's a problem with the mirror.""" - -class MirrorManager: - """Manages all requests for mirror information. - - @type cache_dir: L{twisted.python.filepath.FilePath} - @ivar cache_dir: the directory to use for storing all files - @type unload_delay: C{int} - @ivar unload_delay: the time to wait before unloading the apt cache - @type apt_caches: C{dictionary} - @ivar apt_caches: the avaliable mirrors - """ - - def __init__(self, cache_dir, unload_delay): - self.cache_dir = cache_dir - self.unload_delay = unload_delay - self.apt_caches = {} - - def extractPath(self, url): - """Break the full URI down into the site, base directory and path. - - Site is the host and port of the mirror. Base directory is the - directory to the mirror location (usually just '/debian'). Path is - the remaining path to get to the file. - - E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2 - would return ('ftp.debian.org:80', '/debian', - '/dists/sid/binary-i386/Packages.bz2'). - - @param url: the URI of the file's location on the mirror - @rtype: (C{string}, C{string}, C{string}) - @return: the site, base directory and path to the file - """ - # Extract the host and port - parsed = urlparse(url) - host, port = splitHostPort(parsed[0], parsed[1]) - site = host + ":" + str(port) - path = parsed[2] - - # Try to find the base directory (most can be found this way) - i = max(path.rfind('/dists/'), path.rfind('/pool/')) - if i >= 0: - baseDir = path[:i] - path = path[i:] - else: - # Uh oh, this is not good - log.msg("Couldn't find a good base directory for path: %s" % (site + path)) - - # Try to find an existing cache that starts with this one - # (fallback to using an empty base directory) - baseDir = '' - if site in self.apt_caches: - longest_match = 0 - for base in self.apt_caches[site]: - base_match = '' - for dirs in path.split('/'): - if base.startswith(base_match + '/' + dirs): - base_match += '/' + dirs - else: - break - if len(base_match) > longest_match: - longest_match = len(base_match) - baseDir = base_match - log.msg("Settled on baseDir: %s" % baseDir) - - return site, baseDir, path - - def init(self, site, baseDir): - """Make sure an L{AptPackages} exists for this mirror.""" - if site not in self.apt_caches: - self.apt_caches[site] = {} - - if baseDir not in self.apt_caches[site]: - site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_')) - site_cache.makedirs - self.apt_caches[site][baseDir] = AptPackages(site_cache, self.unload_delay) - - def updatedFile(self, url, file_path): - """A file in the mirror has changed or been added. - - @see: L{AptPackages.PackageFileList.update_file} - """ - site, baseDir, path = self.extractPath(url) - self.init(site, baseDir) - self.apt_caches[site][baseDir].file_updated(path, file_path) - - def findHash(self, url): - """Find the hash for a given url. - - @param url: the URI of the file's location on the mirror - @rtype: L{twisted.internet.defer.Deferred} - @return: a deferred that will fire with the returned L{Hash.HashObject} - """ - site, baseDir, path = self.extractPath(url) - if site in self.apt_caches and baseDir in self.apt_caches[site]: - return self.apt_caches[site][baseDir].findHash(path) - d = defer.Deferred() - d.errback(MirrorError("Site Not Found")) - return d - - def cleanup(self): - for site in self.apt_caches.keys(): - for baseDir in self.apt_caches[site].keys(): - self.apt_caches[site][baseDir].cleanup() - del self.apt_caches[site][baseDir] - del self.apt_caches[site] - -class TestMirrorManager(unittest.TestCase): - """Unit tests for the mirror manager.""" - - timeout = 20 - pending_calls = [] - client = None - - def setUp(self): - self.client = MirrorManager(FilePath('/tmp/.apt-p2p'), 300) - - def test_extractPath(self): - """Test extracting the site and base directory from various mirrors.""" - site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release') - self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site) - self.failUnless(baseDir == "/debian", "no match: %s" % baseDir) - self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path) - - site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz') - self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site) - self.failUnless(baseDir == "/debian", "no match: %s" % baseDir) - self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path) - - site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release') - self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site) - self.failUnless(baseDir == "", "no match: %s" % baseDir) - self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path) - - def verifyHash(self, found_hash, path, true_hash): - self.failUnless(found_hash.hexexpected() == true_hash, - "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash)) - - def test_findHash(self): - """Tests finding the hash of an index file, binary package, source package, and another index file.""" - # Find the largest index files that are for 'main' - self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n') - self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n') - - # Find the Release file corresponding to the found Packages file - for f in os.walk('/var/lib/apt/lists').next()[2]: - if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]): - self.releaseFile = f - break - - # Add all the found files to the mirror - self.client.updatedFile('http://' + self.releaseFile.replace('_','/'), - FilePath('/var/lib/apt/lists/' + self.releaseFile)) - self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + - self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'), - FilePath('/var/lib/apt/lists/' + self.packagesFile)) - self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + - self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'), - FilePath('/var/lib/apt/lists/' + self.sourcesFile)) - - lastDefer = defer.Deferred() - - # Lookup a Packages.bz2 file - idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + - '/var/lib/apt/lists/' + self.releaseFile + - ' | grep -E " main/binary-i386/Packages.bz2$"' - ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n') - idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2' - - d = self.client.findHash(idx_path) - d.addCallback(self.verifyHash, idx_path, idx_hash) - - # Lookup the binary 'dpkg' package - pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.packagesFile + - ' | grep -E "^SHA1:" | head -n 1' + - ' | cut -d\ -f 2').read().rstrip('\n') - pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \ - os.popen('grep -A 30 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.packagesFile + - ' | grep -E "^Filename:" | head -n 1' + - ' | cut -d\ -f 2').read().rstrip('\n') - - d = self.client.findHash(pkg_path) - d.addCallback(self.verifyHash, pkg_path, pkg_hash) - - # Lookup the source 'dpkg' package - src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.sourcesFile + - ' | grep -E "^Directory:" | head -n 1' + - ' | cut -d\ -f 2').read().rstrip('\n') - src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.sourcesFile + - ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + - ' | cut -d\ -f 2').read().split('\n')[:-1] - src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + - '/var/lib/apt/lists/' + self.sourcesFile + - ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + - ' | cut -d\ -f 4').read().split('\n')[:-1] - - for i in range(len(src_hashes)): - src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i] - d = self.client.findHash(src_path) - d.addCallback(self.verifyHash, src_path, src_hashes[i]) - - # Lookup a Sources.bz2 file - idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + - '/var/lib/apt/lists/' + self.releaseFile + - ' | grep -E " main/source/Sources.bz2$"' - ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n') - idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2' - - d = self.client.findHash(idx_path) - d.addCallback(self.verifyHash, idx_path, idx_hash) - - d.addBoth(lastDefer.callback) - return lastDefer - - def tearDown(self): - for p in self.pending_calls: - if p.active(): - p.cancel() - self.client.cleanup() - self.client = None - \ No newline at end of file diff --git a/apt_dht/PeerManager.py b/apt_dht/PeerManager.py deleted file mode 100644 index faa0fe3..0000000 --- a/apt_dht/PeerManager.py +++ /dev/null @@ -1,153 +0,0 @@ - -"""Manage a set of peers and the requests to them.""" - -from random import choice -from urlparse import urlparse, urlunparse -from urllib import quote_plus - -from twisted.internet import reactor, defer -from twisted.python import log -from twisted.trial import unittest -from twisted.web2 import stream as stream_mod -from twisted.web2.http import splitHostPort - -from HTTPDownloader import Peer -from util import uncompact - -class PeerManager: - """Manage a set of peers and the requests to them. - - @type clients: C{dictionary} - @ivar clients: the available peers that have been previously contacted - """ - - def __init__(self): - """Initialize the instance.""" - self.clients = {} - - def get(self, hash, mirror, peers = [], method="GET", modtime=None): - """Download from a list of peers or fallback to a mirror. - - @type hash: L{Hash.HashObject} - @param hash: the hash object containing the expected hash for the file - @param mirror: the URI of the file on the mirror - @type peers: C{list} of C{string} - @param peers: a list of the peer info where the file can be found - (optional, defaults to downloading from the mirror) - @type method: C{string} - @param method: the HTTP method to use, 'GET' or 'HEAD' - (optional, defaults to 'GET') - @type modtime: C{int} - @param modtime: the modification time to use for an 'If-Modified-Since' - header, as seconds since the epoch - (optional, defaults to not sending that header) - """ - if peers: - # Choose one of the peers at random - compact_peer = choice(peers) - peer = uncompact(compact_peer['c']) - log.msg('Downloading from peer %r' % (peer, )) - site = peer - path = '/~/' + quote_plus(hash.expected()) - else: - log.msg('Downloading (%s) from mirror %s' % (method, mirror)) - parsed = urlparse(mirror) - assert parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0] - site = splitHostPort(parsed[0], parsed[1]) - path = urlunparse(('', '') + parsed[2:]) - - return self.getPeer(site, path, method, modtime) - - def getPeer(self, site, path, method="GET", modtime=None): - """Create a new peer if necessary and forward the request to it. - - @type site: (C{string}, C{int}) - @param site: the IP address and port of the peer - @type path: C{string} - @param path: the path to the file on the peer - @type method: C{string} - @param method: the HTTP method to use, 'GET' or 'HEAD' - (optional, defaults to 'GET') - @type modtime: C{int} - @param modtime: the modification time to use for an 'If-Modified-Since' - header, as seconds since the epoch - (optional, defaults to not sending that header) - """ - if site not in self.clients: - self.clients[site] = Peer(site[0], site[1]) - return self.clients[site].get(path, method, modtime) - - def close(self): - """Close all the connections to peers.""" - for site in self.clients: - self.clients[site].close() - self.clients = {} - -class TestPeerManager(unittest.TestCase): - """Unit tests for the PeerManager.""" - - manager = None - pending_calls = [] - - def gotResp(self, resp, num, expect): - self.failUnless(resp.code >= 200 and resp.code < 300, "Got a non-200 response: %r" % resp.code) - if expect is not None: - self.failUnless(resp.stream.length == expect, "Length was incorrect, got %r, expected %r" % (resp.stream.length, expect)) - def print_(n): - pass - def printdone(n): - pass - stream_mod.readStream(resp.stream, print_).addCallback(printdone) - - def test_download(self): - """Tests a normal download.""" - self.manager = PeerManager() - self.timeout = 10 - - host = 'www.ietf.org' - d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt') - d.addCallback(self.gotResp, 1, 1070) - return d - - def test_head(self): - """Tests a 'HEAD' request.""" - self.manager = PeerManager() - self.timeout = 10 - - host = 'www.ietf.org' - d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt', method = "HEAD") - d.addCallback(self.gotResp, 1, 0) - return d - - def test_multiple_downloads(self): - """Tests multiple downloads with queueing and connection closing.""" - self.manager = PeerManager() - self.timeout = 120 - lastDefer = defer.Deferred() - - def newRequest(host, path, num, expect, last=False): - d = self.manager.get('', 'http://' + host + ':' + str(80) + path) - d.addCallback(self.gotResp, num, expect) - if last: - d.addBoth(lastDefer.callback) - - newRequest('www.ietf.org', "/rfc/rfc0006.txt", 1, 1776) - newRequest('www.ietf.org', "/rfc/rfc2362.txt", 2, 159833) - newRequest('www.google.ca', "/", 3, None) - self.pending_calls.append(reactor.callLater(1, newRequest, 'www.sfu.ca', '/', 4, None)) - self.pending_calls.append(reactor.callLater(10, newRequest, 'www.ietf.org', '/rfc/rfc0048.txt', 5, 41696)) - self.pending_calls.append(reactor.callLater(30, newRequest, 'www.ietf.org', '/rfc/rfc0022.txt', 6, 4606)) - self.pending_calls.append(reactor.callLater(31, newRequest, 'www.sfu.ca', '/studentcentral/index.html', 7, None)) - self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0014.txt', 8, 27)) - self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0001.txt', 9, 21088)) - self.pending_calls.append(reactor.callLater(62, newRequest, 'www.google.ca', '/intl/en/options/', 0, None, True)) - return lastDefer - - def tearDown(self): - for p in self.pending_calls: - if p.active(): - p.cancel() - self.pending_calls = [] - if self.manager: - self.manager.close() - self.manager = None diff --git a/apt_dht/__init__.py b/apt_dht/__init__.py deleted file mode 100644 index 356924f..0000000 --- a/apt_dht/__init__.py +++ /dev/null @@ -1,47 +0,0 @@ - -"""The main apt-p2p modules. - -To run apt-p2p, you probably want to do something like:: - - from apt_p2p.apt_p2p import AptP2P - myapp = AptP2P(myDHT) - -where myDHT is a DHT that implements interfaces.IDHT. - -Diagram of the interaction between the given modules:: - - +---------------+ +-----------------------------------+ +------------- - | AptP2P | | DHT | | Internet - | |--->|join DHT|----|--\ - | |--->|loadConfig | | | Another - | |--->|getValue | | | Node - | |--->|storeValue DHT|<---|--/ - | |--->|leave | | - | | +-----------------------------------+ | - | | +-------------+ +----------------+ | - | | | PeerManager | | HTTPDownloader*| | - | |--->|get |--->|get HTTP|----|---> Mirror - | | | |--->|getRange | | - | |--->|close |--->|close HTTP|----|--\ - | | +-------------+ +----------------+ | | Another - | | +-----------------------------------+ | | Peer - | | | HTTPServer HTTP|<---|--/ - | |--->|getHTTPFactory | +------------- - |check_freshness|<---| | +------------- - | get_resp|<---| HTTP|<---|HTTP Request - | | +-----------------------------------+ | - | | +---------------+ +--------------+ | Local Net - | | | CacheManager | | ProxyFile- | | (apt) - | |--->|scanDirectories| | Stream* | | - | |--->|save_file |--->|__init__ HTTP|--->|HTTP Response - | |--->|save_error | | | +------------- - | | | | | | +------------- - |new_cached_file|<---| | | file|--->|write file - | | +---------------+ +--------------+ | - | | +---------------+ +--------------+ | Filesystem - | | | MirrorManager | | AptPackages* | | - | |--->|updatedFile |--->|file_updated | | - | |--->|findHash |--->|findHash file|<---|read file - +---------------+ +---------------+ +--------------+ +------------- - -""" diff --git a/apt_dht/apt_dht.py b/apt_dht/apt_dht.py deleted file mode 100644 index 9e360a0..0000000 --- a/apt_dht/apt_dht.py +++ /dev/null @@ -1,369 +0,0 @@ - -"""The main program code. - -@var DHT_PIECES: the maximum number of pieces to store with our contact info - in the DHT -@var TORRENT_PIECES: the maximum number of pieces to store as a separate entry - in the DHT -@var download_dir: the name of the directory to use for downloaded files - -""" - -from binascii import b2a_hex -from urlparse import urlunparse -import os, re, sha - -from twisted.internet import defer, reactor -from twisted.web2 import server, http, http_headers, static -from twisted.python import log, failure -from twisted.python.filepath import FilePath - -from apt_p2p_conf import config -from PeerManager import PeerManager -from HTTPServer import TopLevel -from MirrorManager import MirrorManager -from CacheManager import CacheManager -from Hash import HashObject -from db import DB -from util import findMyIPAddr, compact - -DHT_PIECES = 4 -TORRENT_PIECES = 70 - -download_dir = 'cache' - -class AptP2P: - """The main code object that does all of the work. - - Contains all of the sub-components that do all the low-level work, and - coordinates communication between them. - - @type cache_dir: L{twisted.python.filepath.FilePath} - @ivar cache_dir: the directory to use for storing all files - @type db: L{db.DB} - @ivar db: the database to use for tracking files and hashes - @type dht: L{interfaces.IDHT} - @ivar dht: the DHT instance to use - @type http_server: L{HTTPServer.TopLevel} - @ivar http_server: the web server that will handle all requests from apt - and from other peers - @type peers: L{PeerManager.PeerManager} - @ivar peers: the manager of all downloads from mirrors and other peers - @type mirrors: L{MirrorManager.MirrorManager} - @ivar mirrors: the manager of downloaded information about mirrors which - can be queried to get hashes from file names - @type cache: L{CacheManager.CacheManager} - @ivar cache: the manager of all downloaded files - @type my_contact: C{string} - @ivar my_contact: the 6-byte compact peer representation of this peer's - download information (IP address and port) - """ - - def __init__(self, dht): - """Initialize all the sub-components. - - @type dht: L{interfaces.IDHT} - @param dht: the DHT instance to use - """ - log.msg('Initializing the main apt_p2p application') - self.cache_dir = FilePath(config.get('DEFAULT', 'cache_dir')) - if not self.cache_dir.child(download_dir).exists(): - self.cache_dir.child(download_dir).makedirs() - self.db = DB(self.cache_dir.child('apt-p2p.db')) - self.dht = dht - self.dht.loadConfig(config, config.get('DEFAULT', 'DHT')) - self.dht.join().addCallbacks(self.joinComplete, self.joinError) - self.http_server = TopLevel(self.cache_dir.child(download_dir), self.db, self) - self.getHTTPFactory = self.http_server.getHTTPFactory - self.peers = PeerManager() - self.mirrors = MirrorManager(self.cache_dir, config.gettime('DEFAULT', 'UNLOAD_PACKAGES_CACHE')) - other_dirs = [FilePath(f) for f in config.getstringlist('DEFAULT', 'OTHER_DIRS')] - self.cache = CacheManager(self.cache_dir.child(download_dir), self.db, other_dirs, self) - self.my_contact = None - - #{ DHT maintenance - def joinComplete(self, result): - """Complete the DHT join process and determine our download information. - - Called by the DHT when the join has been completed with information - on the external IP address and port of this peer. - """ - my_addr = findMyIPAddr(result, - config.getint(config.get('DEFAULT', 'DHT'), 'PORT'), - config.getboolean('DEFAULT', 'LOCAL_OK')) - if not my_addr: - raise RuntimeError, "IP address for this machine could not be found" - self.my_contact = compact(my_addr, config.getint('DEFAULT', 'PORT')) - self.cache.scanDirectories() - reactor.callLater(60, self.refreshFiles) - - def joinError(self, failure): - """Joining the DHT has failed.""" - log.msg("joining DHT failed miserably") - log.err(failure) - raise RuntimeError, "IP address for this machine could not be found" - - def refreshFiles(self): - """Refresh any files in the DHT that are about to expire.""" - expireAfter = config.gettime('DEFAULT', 'KEY_REFRESH') - hashes = self.db.expiredHashes(expireAfter) - if len(hashes.keys()) > 0: - log.msg('Refreshing the keys of %d DHT values' % len(hashes.keys())) - self._refreshFiles(None, hashes) - - def _refreshFiles(self, result, hashes): - if result is not None: - log.msg('Storage resulted in: %r' % result) - - if hashes: - raw_hash = hashes.keys()[0] - self.db.refreshHash(raw_hash) - hash = HashObject(raw_hash, pieces = hashes[raw_hash]['pieces']) - del hashes[raw_hash] - storeDefer = self.store(hash) - storeDefer.addBoth(self._refreshFiles, hashes) - else: - reactor.callLater(60, self.refreshFiles) - - #{ Main workflow - def check_freshness(self, req, url, modtime, resp): - """Send a HEAD to the mirror to check if the response from the cache is still valid. - - @type req: L{twisted.web2.http.Request} - @param req: the initial request sent to the HTTP server by apt - @param url: the URI of the actual mirror request - @type modtime: C{int} - @param modtime: the modified time of the cached file (seconds since epoch) - @type resp: L{twisted.web2.http.Response} - @param resp: the response from the cache to be sent to apt - @rtype: L{twisted.internet.defer.Deferred} - @return: a deferred that will be called back with the correct response - """ - log.msg('Checking if %s is still fresh' % url) - d = self.peers.get('', url, method = "HEAD", modtime = modtime) - d.addCallback(self.check_freshness_done, req, url, resp) - return d - - def check_freshness_done(self, resp, req, url, orig_resp): - """Process the returned response from the mirror. - - @type resp: L{twisted.web2.http.Response} - @param resp: the response from the mirror to the HEAD request - @type req: L{twisted.web2.http.Request} - @param req: the initial request sent to the HTTP server by apt - @param url: the URI of the actual mirror request - @type orig_resp: L{twisted.web2.http.Response} - @param orig_resp: the response from the cache to be sent to apt - """ - if resp.code == 304: - log.msg('Still fresh, returning: %s' % url) - return orig_resp - else: - log.msg('Stale, need to redownload: %s' % url) - return self.get_resp(req, url) - - def get_resp(self, req, url): - """Lookup a hash for the file in the local mirror info. - - Starts the process of getting a response to an uncached apt request. - - @type req: L{twisted.web2.http.Request} - @param req: the initial request sent to the HTTP server by apt - @param url: the URI of the actual mirror request - @rtype: L{twisted.internet.defer.Deferred} - @return: a deferred that will be called back with the response - """ - d = defer.Deferred() - - log.msg('Trying to find hash for %s' % url) - findDefer = self.mirrors.findHash(url) - - findDefer.addCallbacks(self.findHash_done, self.findHash_error, - callbackArgs=(req, url, d), errbackArgs=(req, url, d)) - findDefer.addErrback(log.err) - return d - - def findHash_error(self, failure, req, url, d): - """Process the error in hash lookup by returning an empty L{HashObject}.""" - log.err(failure) - self.findHash_done(HashObject(), req, url, d) - - def findHash_done(self, hash, req, url, d): - """Use the returned hash to lookup the file in the cache. - - If the hash was not found, the workflow skips down to download from - the mirror (L{lookupHash_done}). - - @type hash: L{Hash.HashObject} - @param hash: the hash object containing the expected hash for the file - """ - if hash.expected() is None: - log.msg('Hash for %s was not found' % url) - self.lookupHash_done([], hash, url, d) - else: - log.msg('Found hash %s for %s' % (hash.hexexpected(), url)) - - # Lookup hash in cache - locations = self.db.lookupHash(hash.expected(), filesOnly = True) - self.getCachedFile(hash, req, url, d, locations) - - def getCachedFile(self, hash, req, url, d, locations): - """Try to return the file from the cache, otherwise move on to a DHT lookup. - - @type locations: C{list} of C{dictionary} - @param locations: the files in the cache that match the hash, - the dictionary contains a key 'path' whose value is a - L{twisted.python.filepath.FilePath} object for the file. - """ - if not locations: - log.msg('Failed to return file from cache: %s' % url) - self.lookupHash(hash, url, d) - return - - # Get the first possible location from the list - file = locations.pop(0)['path'] - log.msg('Returning cached file: %s' % file.path) - - # Get it's response - resp = static.File(file.path).renderHTTP(req) - if isinstance(resp, defer.Deferred): - resp.addBoth(self._getCachedFile, hash, req, url, d, locations) - else: - self._getCachedFile(resp, hash, req, url, d, locations) - - def _getCachedFile(self, resp, hash, req, url, d, locations): - """Check the returned response to be sure it is valid.""" - if isinstance(resp, failure.Failure): - log.msg('Got error trying to get cached file') - log.err() - # Try the next possible location - self.getCachedFile(hash, req, url, d, locations) - return - - log.msg('Cached response: %r' % resp) - - if resp.code >= 200 and resp.code < 400: - d.callback(resp) - else: - # Try the next possible location - self.getCachedFile(hash, req, url, d, locations) - - def lookupHash(self, hash, url, d): - """Lookup the hash in the DHT.""" - log.msg('Looking up hash in DHT for file: %s' % url) - key = hash.expected() - lookupDefer = self.dht.getValue(key) - lookupDefer.addCallback(self.lookupHash_done, hash, url, d) - - def lookupHash_done(self, values, hash, url, d): - """Start the download of the file. - - The download will be from peers if the DHT lookup succeeded, or - from the mirror otherwise. - - @type values: C{list} of C{dictionary} - @param values: the returned values from the DHT containing peer - download information - """ - if not values: - log.msg('Peers for %s were not found' % url) - getDefer = self.peers.get(hash, url) - getDefer.addCallback(self.cache.save_file, hash, url) - getDefer.addErrback(self.cache.save_error, url) - getDefer.addCallbacks(d.callback, d.errback) - else: - log.msg('Found peers for %s: %r' % (url, values)) - # Download from the found peers - getDefer = self.peers.get(hash, url, values) - getDefer.addCallback(self.check_response, hash, url) - getDefer.addCallback(self.cache.save_file, hash, url) - getDefer.addErrback(self.cache.save_error, url) - getDefer.addCallbacks(d.callback, d.errback) - - def check_response(self, response, hash, url): - """Check the response from peers, and download from the mirror if it is not.""" - if response.code < 200 or response.code >= 300: - log.msg('Download from peers failed, going to direct download: %s' % url) - getDefer = self.peers.get(hash, url) - return getDefer - return response - - def new_cached_file(self, file_path, hash, new_hash, url = None, forceDHT = False): - """Add a newly cached file to the mirror info and/or the DHT. - - If the file was downloaded, set url to the path it was downloaded for. - Doesn't add a file to the DHT unless a hash was found for it - (but does add it anyway if forceDHT is True). - - @type file_path: L{twisted.python.filepath.FilePath} - @param file_path: the location of the file in the local cache - @type hash: L{Hash.HashObject} - @param hash: the original (expected) hash object containing also the - hash of the downloaded file - @type new_hash: C{boolean} - @param new_hash: whether the has was new to this peer, and so should - be added to the DHT - @type url: C{string} - @param url: the URI of the location of the file in the mirror - (optional, defaults to not adding the file to the mirror info) - @type forceDHT: C{boolean} - @param forceDHT: whether to force addition of the file to the DHT - even if the hash was not found in a mirror - (optional, defaults to False) - """ - if url: - self.mirrors.updatedFile(url, file_path) - - if self.my_contact and hash and new_hash and (hash.expected() is not None or forceDHT): - return self.store(hash) - return None - - def store(self, hash): - """Add a key/value pair for the file to the DHT. - - Sets the key and value from the hash information, and tries to add - it to the DHT. - """ - key = hash.digest() - value = {'c': self.my_contact} - pieces = hash.pieceDigests() - - # Determine how to store any piece data - if len(pieces) <= 1: - pass - elif len(pieces) <= DHT_PIECES: - # Short enough to be stored with our peer contact info - value['t'] = {'t': ''.join(pieces)} - elif len(pieces) <= TORRENT_PIECES: - # Short enough to be stored in a separate key in the DHT - s = sha.new().update(''.join(pieces)) - value['h'] = s.digest() - else: - # Too long, must be served up by our peer HTTP server - s = sha.new().update(''.join(pieces)) - value['l'] = s.digest() - - storeDefer = self.dht.storeValue(key, value) - storeDefer.addCallback(self.store_done, hash) - return storeDefer - - def store_done(self, result, hash): - """Add a key/value pair for the pieces of the file to the DHT (if necessary).""" - log.msg('Added %s to the DHT: %r' % (hash.hexdigest(), result)) - pieces = hash.pieceDigests() - if len(pieces) > DHT_PIECES and len(pieces) <= TORRENT_PIECES: - # Add the piece data key and value to the DHT - s = sha.new().update(''.join(pieces)) - key = s.digest() - value = {'t': ''.join(pieces)} - - storeDefer = self.dht.storeValue(key, value) - storeDefer.addCallback(self.store_torrent_done, key) - return storeDefer - return result - - def store_torrent_done(self, result, key): - """Adding the file to the DHT is complete, and so is the workflow.""" - log.msg('Added torrent string %s to the DHT: %r' % (b2ahex(key), result)) - return result - \ No newline at end of file diff --git a/apt_dht/apt_dht_conf.py b/apt_dht/apt_dht_conf.py deleted file mode 100644 index aaf2013..0000000 --- a/apt_dht/apt_dht_conf.py +++ /dev/null @@ -1,165 +0,0 @@ - -"""Loading of configuration files and parameters. - -@type version: L{twisted.python.versions.Version} -@var version: the version of this program -@type DEFAULT_CONFIG_FILES: C{list} of C{string} -@var DEFAULT_CONFIG_FILES: the default config files to load (in order) -@var DEFAULTS: the default config parameter values for the main program -@var DHT_DEFAULTS: the default config parameter values for the default DHT - -""" - -import os, sys -from ConfigParser import SafeConfigParser - -from twisted.python import log, versions - -class ConfigError(Exception): - """Errors that occur in the loading of configuration variables.""" - def __init__(self, message): - self.message = message - def __str__(self): - return repr(self.message) - -version = versions.Version('apt-p2p', 0, 0, 0) - -# Set the home parameter -home = os.path.expandvars('${HOME}') -if home == '${HOME}' or not os.path.isdir(home): - home = os.path.expanduser('~') - if not os.path.isdir(home): - home = os.path.abspath(os.path.dirname(sys.argv[0])) - -DEFAULT_CONFIG_FILES=['/etc/apt-p2p/apt-p2p.conf', - home + '/.apt-p2p/apt-p2p.conf'] - -DEFAULTS = { - - # Port to listen on for all requests (TCP and UDP) - 'PORT': '9977', - - # Directory to store the downloaded files in - 'CACHE_DIR': home + '/.apt-p2p/cache', - - # Other directories containing packages to share with others - # WARNING: all files in these directories will be hashed and available - # for everybody to download - 'OTHER_DIRS': """""", - - # User name to try and run as - 'USERNAME': '', - - # Whether it's OK to use an IP addres from a known local/private range - 'LOCAL_OK': 'no', - - # Unload the packages cache after an interval of inactivity this long. - # The packages cache uses a lot of memory, and only takes a few seconds - # to reload when a new request arrives. - 'UNLOAD_PACKAGES_CACHE': '5m', - - # Refresh the DHT keys after this much time has passed. - # This should be a time slightly less than the DHT's KEY_EXPIRE value. - 'KEY_REFRESH': '57m', - - # Which DHT implementation to use. - # It must be possile to do "from .DHT import DHT" to get a class that - # implements the IDHT interface. - 'DHT': 'apt_p2p_Khashmir', - - # Whether to only run the DHT (for providing only a bootstrap node) - 'DHT-ONLY': 'no', -} - -DHT_DEFAULTS = { - # bootstrap nodes to contact to join the DHT - 'BOOTSTRAP': """www.camrdale.org:9977 - steveholt.hopto.org:9976""", - - # whether this node is a bootstrap node - 'BOOTSTRAP_NODE': "no", - - # Kademlia "K" constant, this should be an even number - 'K': '8', - - # SHA1 is 160 bits long - 'HASH_LENGTH': '160', - - # checkpoint every this many seconds - 'CHECKPOINT_INTERVAL': '5m', # five minutes - - ### SEARCHING/STORING - # concurrent xmlrpc calls per find node/value request! - 'CONCURRENT_REQS': '4', - - # how many hosts to post to - 'STORE_REDUNDANCY': '3', - - # How many values to attempt to retrieve from the DHT. - # Setting this to 0 will try and get all values (which could take a while if - # a lot of nodes have values). Setting it negative will try to get that - # number of results from only the closest STORE_REDUNDANCY nodes to the hash. - # The default is a large negative number so all values from the closest - # STORE_REDUNDANCY nodes will be retrieved. - 'RETRIEVE_VALUES': '-10000', - - ### ROUTING TABLE STUFF - # how many times in a row a node can fail to respond before it's booted from the routing table - 'MAX_FAILURES': '3', - - # never ping a node more often than this - 'MIN_PING_INTERVAL': '15m', # fifteen minutes - - # refresh buckets that haven't been touched in this long - 'BUCKET_STALENESS': '1h', # one hour - - # expire entries older than this - 'KEY_EXPIRE': '1h', # 60 minutes - - # whether to spew info about the requests/responses in the protocol - 'SPEW': 'yes', -} - -class AptP2PConfigParser(SafeConfigParser): - """Adds 'gettime' and 'getstringlist' to ConfigParser objects. - - @ivar time_multipliers: the 'gettime' suffixes and the multipliers needed - to convert them to seconds - """ - - time_multipliers={ - 's': 1, #seconds - 'm': 60, #minutes - 'h': 3600, #hours - 'd': 86400,#days - } - - def gettime(self, section, option): - """Read the config parameter as a time value.""" - mult = 1 - value = self.get(section, option) - if len(value) == 0: - raise ConfigError("Configuration parse error: [%s] %s" % (section, option)) - suffix = value[-1].lower() - if suffix in self.time_multipliers.keys(): - mult = self.time_multipliers[suffix] - value = value[:-1] - return int(value)*mult - - def getstring(self, section, option): - """Read the config parameter as a string.""" - return self.get(section,option) - - def getstringlist(self, section, option): - """Read the multi-line config parameter as a list of strings.""" - return self.get(section,option).split() - - def optionxform(self, option): - """Use all uppercase in the config parameters names.""" - return option.upper() - -# Initialize the default config parameters -config = AptP2PConfigParser(DEFAULTS) -config.add_section(config.get('DEFAULT', 'DHT')) -for k in DHT_DEFAULTS: - config.set(config.get('DEFAULT', 'DHT'), k, DHT_DEFAULTS[k]) diff --git a/apt_dht/db.py b/apt_dht/db.py deleted file mode 100644 index 396f419..0000000 --- a/apt_dht/db.py +++ /dev/null @@ -1,421 +0,0 @@ - -"""An sqlite database for storing persistent files and hashes.""" - -from datetime import datetime, timedelta -from pysqlite2 import dbapi2 as sqlite -from binascii import a2b_base64, b2a_base64 -from time import sleep -import os - -from twisted.python.filepath import FilePath -from twisted.trial import unittest - -assert sqlite.version_info >= (2, 1) - -class DBExcept(Exception): - """An error occurred in accessing the database.""" - pass - -class khash(str): - """Dummy class to convert all hashes to base64 for storing in the DB.""" - -# Initialize the database to work with 'khash' objects (binary strings) -sqlite.register_adapter(khash, b2a_base64) -sqlite.register_converter("KHASH", a2b_base64) -sqlite.register_converter("khash", a2b_base64) -sqlite.enable_callback_tracebacks(True) - -class DB: - """An sqlite database for storing persistent files and hashes. - - @type db: L{twisted.python.filepath.FilePath} - @ivar db: the database file to use - @type conn: L{pysqlite2.dbapi2.Connection} - @ivar conn: an open connection to the sqlite database - """ - - def __init__(self, db): - """Load or create the database file. - - @type db: L{twisted.python.filepath.FilePath} - @param db: the database file to use - """ - self.db = db - self.db.restat(False) - if self.db.exists(): - self._loadDB() - else: - self._createNewDB() - self.conn.text_factory = str - self.conn.row_factory = sqlite.Row - - def _loadDB(self): - """Open a new connection to the existing database file""" - try: - self.conn = sqlite.connect(database=self.db.path, detect_types=sqlite.PARSE_DECLTYPES) - except: - import traceback - raise DBExcept, "Couldn't open DB", traceback.format_exc() - - def _createNewDB(self): - """Open a connection to a new database and create the necessary tables.""" - if not self.db.parent().exists(): - self.db.parent().makedirs() - self.conn = sqlite.connect(database=self.db.path, detect_types=sqlite.PARSE_DECLTYPES) - c = self.conn.cursor() - c.execute("CREATE TABLE files (path TEXT PRIMARY KEY UNIQUE, hashID INTEGER, " + - "size NUMBER, mtime NUMBER)") - c.execute("CREATE TABLE hashes (hashID INTEGER PRIMARY KEY AUTOINCREMENT, " + - "hash KHASH UNIQUE, pieces KHASH, " + - "piecehash KHASH, refreshed TIMESTAMP)") - c.execute("CREATE INDEX hashes_refreshed ON hashes(refreshed)") - c.execute("CREATE INDEX hashes_piecehash ON hashes(piecehash)") - c.close() - self.conn.commit() - - def _removeChanged(self, file, row): - """If the file has changed or is missing, remove it from the DB. - - @type file: L{twisted.python.filepath.FilePath} - @param file: the file to check - @type row: C{dictionary}-like object - @param row: contains the expected 'size' and 'mtime' of the file - @rtype: C{boolean} - @return: True if the file is unchanged, False if it is changed, - and None if it is missing - """ - res = None - if row: - file.restat(False) - if file.exists(): - # Compare the current with the expected file properties - res = (row['size'] == file.getsize() and row['mtime'] == file.getmtime()) - if not res: - # Remove the file from the database - c = self.conn.cursor() - c.execute("DELETE FROM files WHERE path = ?", (file.path, )) - self.conn.commit() - c.close() - return res - - def storeFile(self, file, hash, pieces = ''): - """Store or update a file in the database. - - @type file: L{twisted.python.filepath.FilePath} - @param file: the file to check - @type hash: C{string} - @param hash: the hash of the file - @type pieces: C{string} - @param pieces: the concatenated list of the hashes of the pieces of - the file (optional, defaults to the empty string) - @return: True if the hash was not in the database before - (so it needs to be added to the DHT) - """ - # Hash the pieces to get the piecehash - piecehash = '' - if pieces: - s = sha.new().update(pieces) - piecehash = sha.digest() - - # Check the database for the hash - c = self.conn.cursor() - c.execute("SELECT hashID, piecehash FROM hashes WHERE hash = ?", (khash(hash), )) - row = c.fetchone() - if row: - assert piecehash == row['piecehash'] - new_hash = False - hashID = row['hashID'] - else: - # Add the new hash to the database - c = self.conn.cursor() - c.execute("INSERT OR REPLACE INTO hashes (hash, pieces, piecehash, refreshed) VALUES (?, ?, ?, ?)", - (khash(hash), khash(pieces), khash(piecehash), datetime.now())) - self.conn.commit() - new_hash = True - hashID = c.lastrowid - - # Add the file to the database - file.restat() - c.execute("INSERT OR REPLACE INTO files (path, hashID, size, mtime) VALUES (?, ?, ?, ?)", - (file.path, hashID, file.getsize(), file.getmtime())) - self.conn.commit() - c.close() - - return new_hash - - def getFile(self, file): - """Get a file from the database. - - If it has changed or is missing, it is removed from the database. - - @type file: L{twisted.python.filepath.FilePath} - @param file: the file to check - @return: dictionary of info for the file, False if changed, or - None if not in database or missing - """ - c = self.conn.cursor() - c.execute("SELECT hash, size, mtime, pieces FROM files JOIN hashes USING (hashID) WHERE path = ?", (file.path, )) - row = c.fetchone() - res = None - if row: - res = self._removeChanged(file, row) - if res: - res = {} - res['hash'] = row['hash'] - res['size'] = row['size'] - res['pieces'] = row['pieces'] - c.close() - return res - - def lookupHash(self, hash, filesOnly = False): - """Find a file by hash in the database. - - If any found files have changed or are missing, they are removed - from the database. If filesOnly is False then it will also look for - piece string hashes if no files can be found. - - @return: list of dictionaries of info for the found files - """ - # Try to find the hash in the files table - c = self.conn.cursor() - c.execute("SELECT path, size, mtime, refreshed, pieces FROM files JOIN hashes USING (hashID) WHERE hash = ?", (khash(hash), )) - row = c.fetchone() - files = [] - while row: - # Save the file to the list of found files - file = FilePath(row['path']) - res = self._removeChanged(file, row) - if res: - res = {} - res['path'] = file - res['size'] = row['size'] - res['refreshed'] = row['refreshed'] - res['pieces'] = row['pieces'] - files.append(res) - row = c.fetchone() - - if not filesOnly and not files: - # No files were found, so check the piecehashes as well - c.execute("SELECT refreshed, pieces, piecehash FROM hashes WHERE piecehash = ?", (khash(hash), )) - row = c.fetchone() - if row: - res = {} - res['refreshed'] = row['refreshed'] - res['pieces'] = row['pieces'] - files.append(res) - - c.close() - return files - - def isUnchanged(self, file): - """Check if a file in the file system has changed. - - If it has changed, it is removed from the database. - - @return: True if unchanged, False if changed, None if not in database - """ - c = self.conn.cursor() - c.execute("SELECT size, mtime FROM files WHERE path = ?", (file.path, )) - row = c.fetchone() - return self._removeChanged(file, row) - - def refreshHash(self, hash): - """Refresh the publishing time of a hash.""" - c = self.conn.cursor() - c.execute("UPDATE hashes SET refreshed = ? WHERE hash = ?", (datetime.now(), khash(hash))) - c.close() - - def expiredHashes(self, expireAfter): - """Find files that need refreshing after expireAfter seconds. - - For each hash that needs refreshing, finds all the files with that hash. - If the file has changed or is missing, it is removed from the table. - - @return: dictionary with keys the hashes, values a list of FilePaths - """ - t = datetime.now() - timedelta(seconds=expireAfter) - - # Find all the hashes that need refreshing - c = self.conn.cursor() - c.execute("SELECT hashID, hash, pieces FROM hashes WHERE refreshed < ?", (t, )) - row = c.fetchone() - expired = {} - while row: - res = expired.setdefault(row['hash'], {}) - res['hashID'] = row['hashID'] - res['hash'] = row['hash'] - res['pieces'] = row['pieces'] - row = c.fetchone() - - # Make sure there are still valid files for each hash - for hash in expired.values(): - valid = False - c.execute("SELECT path, size, mtime FROM files WHERE hashID = ?", (hash['hashID'], )) - row = c.fetchone() - while row: - res = self._removeChanged(FilePath(row['path']), row) - if res: - valid = True - row = c.fetchone() - if not valid: - # Remove hashes for which no files are still available - del expired[hash['hash']] - c.execute("DELETE FROM hashes WHERE hashID = ?", (hash['hashID'], )) - - self.conn.commit() - c.close() - - return expired - - def removeUntrackedFiles(self, dirs): - """Remove files that are no longer tracked by the program. - - @type dirs: C{list} of L{twisted.python.filepath.FilePath} - @param dirs: a list of the directories that we are tracking - @return: list of files that were removed - """ - assert len(dirs) >= 1 - - # Create a list of globs and an SQL statement for the directories - newdirs = [] - sql = "WHERE" - for dir in dirs: - newdirs.append(dir.child('*').path) - sql += " path NOT GLOB ? AND" - sql = sql[:-4] - - # Get a listing of all the files that will be removed - c = self.conn.cursor() - c.execute("SELECT path FROM files " + sql, newdirs) - row = c.fetchone() - removed = [] - while row: - removed.append(FilePath(row['path'])) - row = c.fetchone() - - # Delete all the removed files from the database - if removed: - c.execute("DELETE FROM files " + sql, newdirs) - self.conn.commit() - - return removed - - def close(self): - """Close the database connection.""" - self.conn.close() - -class TestDB(unittest.TestCase): - """Tests for the khashmir database.""" - - timeout = 5 - db = FilePath('/tmp/khashmir.db') - hash = '\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!' - directory = FilePath('/tmp/apt-p2p/') - file = FilePath('/tmp/apt-p2p/khashmir.test') - testfile = 'tmp/khashmir.test' - dirs = [FilePath('/tmp/apt-p2p/top1'), - FilePath('/tmp/apt-p2p/top2/sub1'), - FilePath('/tmp/apt-p2p/top2/sub2/')] - - def setUp(self): - if not self.file.parent().exists(): - self.file.parent().makedirs() - self.file.setContent('fgfhds') - self.file.touch() - self.store = DB(self.db) - self.store.storeFile(self.file, self.hash) - - def test_openExistingDB(self): - """Tests opening an existing database.""" - self.store.close() - self.store = None - sleep(1) - self.store = DB(self.db) - res = self.store.isUnchanged(self.file) - self.failUnless(res) - - def test_getFile(self): - """Tests retrieving a file from the database.""" - res = self.store.getFile(self.file) - self.failUnless(res) - self.failUnlessEqual(res['hash'], self.hash) - - def test_lookupHash(self): - """Tests looking up a hash in the database.""" - res = self.store.lookupHash(self.hash) - self.failUnless(res) - self.failUnlessEqual(len(res), 1) - self.failUnlessEqual(res[0]['path'].path, self.file.path) - - def test_isUnchanged(self): - """Tests checking if a file in the database is unchanged.""" - res = self.store.isUnchanged(self.file) - self.failUnless(res) - sleep(2) - self.file.touch() - res = self.store.isUnchanged(self.file) - self.failUnless(res == False) - res = self.store.isUnchanged(self.file) - self.failUnless(res is None) - - def test_expiry(self): - """Tests retrieving the files from the database that have expired.""" - res = self.store.expiredHashes(1) - self.failUnlessEqual(len(res.keys()), 0) - sleep(2) - res = self.store.expiredHashes(1) - self.failUnlessEqual(len(res.keys()), 1) - self.failUnlessEqual(res.keys()[0], self.hash) - self.store.refreshHash(self.hash) - res = self.store.expiredHashes(1) - self.failUnlessEqual(len(res.keys()), 0) - - def build_dirs(self): - for dir in self.dirs: - file = dir.preauthChild(self.testfile) - if not file.parent().exists(): - file.parent().makedirs() - file.setContent(file.path) - file.touch() - self.store.storeFile(file, self.hash) - - def test_multipleHashes(self): - """Tests looking up a hash with multiple files in the database.""" - self.build_dirs() - res = self.store.expiredHashes(1) - self.failUnlessEqual(len(res.keys()), 0) - res = self.store.lookupHash(self.hash) - self.failUnless(res) - self.failUnlessEqual(len(res), 4) - self.failUnlessEqual(res[0]['refreshed'], res[1]['refreshed']) - self.failUnlessEqual(res[0]['refreshed'], res[2]['refreshed']) - self.failUnlessEqual(res[0]['refreshed'], res[3]['refreshed']) - sleep(2) - res = self.store.expiredHashes(1) - self.failUnlessEqual(len(res.keys()), 1) - self.failUnlessEqual(res.keys()[0], self.hash) - self.store.refreshHash(self.hash) - res = self.store.expiredHashes(1) - self.failUnlessEqual(len(res.keys()), 0) - - def test_removeUntracked(self): - """Tests removing untracked files from the database.""" - self.build_dirs() - res = self.store.removeUntrackedFiles(self.dirs) - self.failUnlessEqual(len(res), 1, 'Got removed paths: %r' % res) - self.failUnlessEqual(res[0], self.file, 'Got removed paths: %r' % res) - res = self.store.removeUntrackedFiles(self.dirs) - self.failUnlessEqual(len(res), 0, 'Got removed paths: %r' % res) - res = self.store.removeUntrackedFiles(self.dirs[1:]) - self.failUnlessEqual(len(res), 1, 'Got removed paths: %r' % res) - self.failUnlessEqual(res[0], self.dirs[0].preauthChild(self.testfile), 'Got removed paths: %r' % res) - res = self.store.removeUntrackedFiles(self.dirs[:1]) - self.failUnlessEqual(len(res), 2, 'Got removed paths: %r' % res) - self.failUnlessIn(self.dirs[1].preauthChild(self.testfile), res, 'Got removed paths: %r' % res) - self.failUnlessIn(self.dirs[2].preauthChild(self.testfile), res, 'Got removed paths: %r' % res) - - def tearDown(self): - self.directory.remove() - self.store.close() - self.db.remove() - diff --git a/apt_dht/interfaces.py b/apt_dht/interfaces.py deleted file mode 100644 index b38de39..0000000 --- a/apt_dht/interfaces.py +++ /dev/null @@ -1,43 +0,0 @@ - -"""Some interfaces that are used by the apt-p2p classes.""" - -from zope.interface import Interface - -class IDHT(Interface): - """An abstract interface for using a DHT implementation.""" - - def loadConfig(self, config, section): - """Load the DHTs configuration from a dictionary. - - @type config: C{SafeConfigParser} - @param config: the dictionary of config values - """ - - def join(self): - """Bootstrap the new DHT node into the DHT. - - @rtype: C{Deferred} - @return: a deferred that will fire when the node has joined - """ - - def leave(self): - """Depart gracefully from the DHT. - - @rtype: C{Deferred} - @return: a deferred that will fire when the node has left - """ - - def getValue(self, key): - """Get a value from the DHT for the specified key. - - The length of the key may be adjusted for use with the DHT. - - @rtype: C{Deferred} - @return: a deferred that will fire with the stored values - """ - - def storeValue(self, key, value): - """Store a value in the DHT for the specified key. - - The length of the key may be adjusted for use with the DHT. - """ diff --git a/apt_dht/policies.py b/apt_dht/policies.py deleted file mode 100644 index e7bae81..0000000 --- a/apt_dht/policies.py +++ /dev/null @@ -1,702 +0,0 @@ -# -*- test-case-name: twisted.test.test_policies -*- -# Copyright (c) 2001-2007 Twisted Matrix Laboratories. -# See LICENSE for details. - - -""" -Resource limiting policies. - -@seealso: See also L{twisted.protocols.htb} for rate limiting. -""" - -# system imports -import sys, operator - -# twisted imports -from twisted.internet.protocol import ServerFactory, Protocol, ClientFactory -from twisted.internet import reactor, error -from twisted.python import log -from zope.interface import providedBy, directlyProvides - - -class ProtocolWrapper(Protocol): - """Wraps protocol instances and acts as their transport as well.""" - - disconnecting = 0 - - def __init__(self, factory, wrappedProtocol): - self.wrappedProtocol = wrappedProtocol - self.factory = factory - - def makeConnection(self, transport): - directlyProvides(self, *providedBy(self) + providedBy(transport)) - Protocol.makeConnection(self, transport) - - # Transport relaying - - def write(self, data): - self.transport.write(data) - - def writeSequence(self, data): - self.transport.writeSequence(data) - - def loseConnection(self): - self.disconnecting = 1 - self.transport.loseConnection() - - def getPeer(self): - return self.transport.getPeer() - - def getHost(self): - return self.transport.getHost() - - def registerProducer(self, producer, streaming): - self.transport.registerProducer(producer, streaming) - - def unregisterProducer(self): - self.transport.unregisterProducer() - - def stopConsuming(self): - self.transport.stopConsuming() - - def __getattr__(self, name): - return getattr(self.transport, name) - - # Protocol relaying - - def connectionMade(self): - self.factory.registerProtocol(self) - self.wrappedProtocol.makeConnection(self) - - def dataReceived(self, data): - self.wrappedProtocol.dataReceived(data) - - def connectionLost(self, reason): - self.factory.unregisterProtocol(self) - self.wrappedProtocol.connectionLost(reason) - - -class WrappingFactory(ClientFactory): - """Wraps a factory and its protocols, and keeps track of them.""" - - protocol = ProtocolWrapper - - def __init__(self, wrappedFactory): - self.wrappedFactory = wrappedFactory - self.protocols = {} - - def doStart(self): - self.wrappedFactory.doStart() - ClientFactory.doStart(self) - - def doStop(self): - self.wrappedFactory.doStop() - ClientFactory.doStop(self) - - def startedConnecting(self, connector): - self.wrappedFactory.startedConnecting(connector) - - def clientConnectionFailed(self, connector, reason): - self.wrappedFactory.clientConnectionFailed(connector, reason) - - def clientConnectionLost(self, connector, reason): - self.wrappedFactory.clientConnectionLost(connector, reason) - - def buildProtocol(self, addr): - return self.protocol(self, self.wrappedFactory.buildProtocol(addr)) - - def registerProtocol(self, p): - """Called by protocol to register itself.""" - self.protocols[p] = 1 - - def unregisterProtocol(self, p): - """Called by protocols when they go away.""" - del self.protocols[p] - - -class ThrottlingProtocol(ProtocolWrapper): - """Protocol for ThrottlingFactory.""" - - # wrap API for tracking bandwidth - - def __init__(self, factory, wrappedProtocol): - ProtocolWrapper.__init__(self, factory, wrappedProtocol) - self._tempDataBuffer = [] - self._tempDataLength = 0 - self.throttled = False - - def write(self, data): - # Check if we can write - if not self.throttled: - paused = self.factory.registerWritten(len(data)) - if not paused: - ProtocolWrapper.write(self, data) - - if paused is not None and hasattr(self, "producer") and self.producer and not self.producer.paused: - # Interrupt the flow so that others can can have a chance - # We can only do this if it's not already paused otherwise we - # risk unpausing something that the Server paused - self.producer.pauseProducing() - reactor.callLater(0, self.producer.resumeProducing) - - if self.throttled or paused: - # Can't write, buffer the data - self._tempDataBuffer.append(data) - self._tempDataLength += len(data) - self._throttleWrites() - - def writeSequence(self, seq): - if not self.throttled: - # Write each sequence separately - while seq and not self.factory.registerWritten(len(seq[0])): - ProtocolWrapper.write(self, seq.pop(0)) - - # If there's some left, we must have been paused - if seq: - self._tempDataBuffer.extend(seq) - self._tempDataLength += reduce(operator.add, map(len, seq)) - self._throttleWrites() - - def dataReceived(self, data): - self.factory.registerRead(len(data)) - ProtocolWrapper.dataReceived(self, data) - - def registerProducer(self, producer, streaming): - assert streaming, "You can only use the ThrottlingProtocol with streaming (push) producers." - self.producer = producer - ProtocolWrapper.registerProducer(self, producer, streaming) - - def unregisterProducer(self): - del self.producer - ProtocolWrapper.unregisterProducer(self) - - - def throttleReads(self): - self.transport.pauseProducing() - - def unthrottleReads(self): - self.transport.resumeProducing() - - def _throttleWrites(self): - # If we haven't yet, queue for unthrottling - if not self.throttled: - self.throttled = True - self.factory.throttledWrites(self) - - if hasattr(self, "producer") and self.producer: - self.producer.pauseProducing() - - def unthrottleWrites(self): - # Write some data - if self._tempDataBuffer: - assert not self.factory.registerWritten(len(self._tempDataBuffer[0])) - self._tempDataLength -= len(self._tempDataBuffer[0]) - ProtocolWrapper.write(self, self._tempDataBuffer.pop(0)) - assert self._tempDataLength >= 0 - - # If we wrote it all, start producing more - if not self._tempDataBuffer: - assert self._tempDataLength == 0 - self.throttled = False - if hasattr(self, "producer") and self.producer: - # This might unpause something the Server has also paused, but - # it will get paused again on first write anyway - reactor.callLater(0, self.producer.resumeProducing) - - return self._tempDataLength - - -class ThrottlingFactory(WrappingFactory): - """ - Throttles bandwidth and number of connections. - - Write bandwidth will only be throttled if there is a producer - registered. - """ - - protocol = ThrottlingProtocol - CHUNK_SIZE = 4*1024 - - def __init__(self, wrappedFactory, maxConnectionCount=sys.maxint, - readLimit=None, writeLimit=None): - WrappingFactory.__init__(self, wrappedFactory) - self.connectionCount = 0 - self.maxConnectionCount = maxConnectionCount - self.readLimit = readLimit # max bytes we should read per second - self.writeLimit = writeLimit # max bytes we should write per second - self.readThisSecond = 0 - self.writeAvailable = writeLimit - self._writeQueue = [] - self.unthrottleReadsID = None - self.checkReadBandwidthID = None - self.unthrottleWritesID = None - self.checkWriteBandwidthID = None - - - def callLater(self, period, func): - """ - Wrapper around L{reactor.callLater} for test purpose. - """ - return reactor.callLater(period, func) - - - def registerWritten(self, length): - """ - Called by protocol to tell us more bytes were written. - Returns True if the bytes could not be written and the protocol should pause itself. - """ - # Check if there are bytes available to write - if self.writeLimit is None: - return None - elif self.writeAvailable > 0: - self.writeAvailable -= length - return False - - return True - - - def throttledWrites(self, p): - """ - Called by the protocol to queue it for later writing. - """ - assert p not in self._writeQueue - self._writeQueue.append(p) - - - def registerRead(self, length): - """ - Called by protocol to tell us more bytes were read. - """ - self.readThisSecond += length - - - def checkReadBandwidth(self): - """ - Checks if we've passed bandwidth limits. - """ - if self.readThisSecond > self.readLimit: - self.throttleReads() - throttleTime = (float(self.readThisSecond) / self.readLimit) - 1.0 - self.unthrottleReadsID = self.callLater(throttleTime, - self.unthrottleReads) - self.readThisSecond = 0 - self.checkReadBandwidthID = self.callLater(1, self.checkReadBandwidth) - - - def checkWriteBandwidth(self): - """ - Add some new available bandwidth, and check for protocols to unthrottle. - """ - # Increase the available write bytes, but not higher than the limit - self.writeAvailable = min(self.writeLimit, self.writeAvailable + self.writeLimit) - - # Write from the queue until it's empty or we're throttled again - while self.writeAvailable > 0 and self._writeQueue: - # Get the first queued protocol - p = self._writeQueue.pop(0) - _tempWriteAvailable = self.writeAvailable - bytesLeft = 1 - - # Unthrottle writes until CHUNK_SIZE is reached or the protocol is unbuffered - while self.writeAvailable > 0 and _tempWriteAvailable - self.writeAvailable < self.CHUNK_SIZE and bytesLeft > 0: - # Unthrottle a single write (from the protocol's buffer) - bytesLeft = p.unthrottleWrites() - - # If the protocol is not done, requeue it - if bytesLeft > 0: - self._writeQueue.append(p) - - self.checkWriteBandwidthID = self.callLater(1, self.checkWriteBandwidth) - - - def throttleReads(self): - """ - Throttle reads on all protocols. - """ - log.msg("Throttling reads on %s" % self) - for p in self.protocols.keys(): - p.throttleReads() - - - def unthrottleReads(self): - """ - Stop throttling reads on all protocols. - """ - self.unthrottleReadsID = None - log.msg("Stopped throttling reads on %s" % self) - for p in self.protocols.keys(): - p.unthrottleReads() - - - def buildProtocol(self, addr): - if self.connectionCount == 0: - if self.readLimit is not None: - self.checkReadBandwidth() - if self.writeLimit is not None: - self.checkWriteBandwidth() - - if self.connectionCount < self.maxConnectionCount: - self.connectionCount += 1 - return WrappingFactory.buildProtocol(self, addr) - else: - log.msg("Max connection count reached!") - return None - - - def unregisterProtocol(self, p): - WrappingFactory.unregisterProtocol(self, p) - self.connectionCount -= 1 - if self.connectionCount == 0: - if self.unthrottleReadsID is not None: - self.unthrottleReadsID.cancel() - if self.checkReadBandwidthID is not None: - self.checkReadBandwidthID.cancel() - if self.unthrottleWritesID is not None: - self.unthrottleWritesID.cancel() - if self.checkWriteBandwidthID is not None: - self.checkWriteBandwidthID.cancel() - - - -class SpewingProtocol(ProtocolWrapper): - def dataReceived(self, data): - log.msg("Received: %r" % data) - ProtocolWrapper.dataReceived(self,data) - - def write(self, data): - log.msg("Sending: %r" % data) - ProtocolWrapper.write(self,data) - - - -class SpewingFactory(WrappingFactory): - protocol = SpewingProtocol - - - -class LimitConnectionsByPeer(WrappingFactory): - """Stability: Unstable""" - - maxConnectionsPerPeer = 5 - - def startFactory(self): - self.peerConnections = {} - - def buildProtocol(self, addr): - peerHost = addr[0] - connectionCount = self.peerConnections.get(peerHost, 0) - if connectionCount >= self.maxConnectionsPerPeer: - return None - self.peerConnections[peerHost] = connectionCount + 1 - return WrappingFactory.buildProtocol(self, addr) - - def unregisterProtocol(self, p): - peerHost = p.getPeer()[1] - self.peerConnections[peerHost] -= 1 - if self.peerConnections[peerHost] == 0: - del self.peerConnections[peerHost] - - -class LimitTotalConnectionsFactory(ServerFactory): - """Factory that limits the number of simultaneous connections. - - API Stability: Unstable - - @type connectionCount: C{int} - @ivar connectionCount: number of current connections. - @type connectionLimit: C{int} or C{None} - @cvar connectionLimit: maximum number of connections. - @type overflowProtocol: L{Protocol} or C{None} - @cvar overflowProtocol: Protocol to use for new connections when - connectionLimit is exceeded. If C{None} (the default value), excess - connections will be closed immediately. - """ - connectionCount = 0 - connectionLimit = None - overflowProtocol = None - - def buildProtocol(self, addr): - if (self.connectionLimit is None or - self.connectionCount < self.connectionLimit): - # Build the normal protocol - wrappedProtocol = self.protocol() - elif self.overflowProtocol is None: - # Just drop the connection - return None - else: - # Too many connections, so build the overflow protocol - wrappedProtocol = self.overflowProtocol() - - wrappedProtocol.factory = self - protocol = ProtocolWrapper(self, wrappedProtocol) - self.connectionCount += 1 - return protocol - - def registerProtocol(self, p): - pass - - def unregisterProtocol(self, p): - self.connectionCount -= 1 - - - -class TimeoutProtocol(ProtocolWrapper): - """ - Protocol that automatically disconnects when the connection is idle. - - Stability: Unstable - """ - - def __init__(self, factory, wrappedProtocol, timeoutPeriod): - """ - Constructor. - - @param factory: An L{IFactory}. - @param wrappedProtocol: A L{Protocol} to wrapp. - @param timeoutPeriod: Number of seconds to wait for activity before - timing out. - """ - ProtocolWrapper.__init__(self, factory, wrappedProtocol) - self.timeoutCall = None - self.setTimeout(timeoutPeriod) - - - def setTimeout(self, timeoutPeriod=None): - """ - Set a timeout. - - This will cancel any existing timeouts. - - @param timeoutPeriod: If not C{None}, change the timeout period. - Otherwise, use the existing value. - """ - self.cancelTimeout() - if timeoutPeriod is not None: - self.timeoutPeriod = timeoutPeriod - self.timeoutCall = self.factory.callLater(self.timeoutPeriod, self.timeoutFunc) - - - def cancelTimeout(self): - """ - Cancel the timeout. - - If the timeout was already cancelled, this does nothing. - """ - if self.timeoutCall: - try: - self.timeoutCall.cancel() - except error.AlreadyCalled: - pass - self.timeoutCall = None - - - def resetTimeout(self): - """ - Reset the timeout, usually because some activity just happened. - """ - if self.timeoutCall: - self.timeoutCall.reset(self.timeoutPeriod) - - - def write(self, data): - self.resetTimeout() - ProtocolWrapper.write(self, data) - - - def writeSequence(self, seq): - self.resetTimeout() - ProtocolWrapper.writeSequence(self, seq) - - - def dataReceived(self, data): - self.resetTimeout() - ProtocolWrapper.dataReceived(self, data) - - - def connectionLost(self, reason): - self.cancelTimeout() - ProtocolWrapper.connectionLost(self, reason) - - - def timeoutFunc(self): - """ - This method is called when the timeout is triggered. - - By default it calls L{loseConnection}. Override this if you want - something else to happen. - """ - self.loseConnection() - - - -class TimeoutFactory(WrappingFactory): - """ - Factory for TimeoutWrapper. - - Stability: Unstable - """ - protocol = TimeoutProtocol - - - def __init__(self, wrappedFactory, timeoutPeriod=30*60): - self.timeoutPeriod = timeoutPeriod - WrappingFactory.__init__(self, wrappedFactory) - - - def buildProtocol(self, addr): - return self.protocol(self, self.wrappedFactory.buildProtocol(addr), - timeoutPeriod=self.timeoutPeriod) - - - def callLater(self, period, func): - """ - Wrapper around L{reactor.callLater} for test purpose. - """ - return reactor.callLater(period, func) - - - -class TrafficLoggingProtocol(ProtocolWrapper): - - def __init__(self, factory, wrappedProtocol, logfile, lengthLimit=None, - number=0): - """ - @param factory: factory which created this protocol. - @type factory: C{protocol.Factory}. - @param wrappedProtocol: the underlying protocol. - @type wrappedProtocol: C{protocol.Protocol}. - @param logfile: file opened for writing used to write log messages. - @type logfile: C{file} - @param lengthLimit: maximum size of the datareceived logged. - @type lengthLimit: C{int} - @param number: identifier of the connection. - @type number: C{int}. - """ - ProtocolWrapper.__init__(self, factory, wrappedProtocol) - self.logfile = logfile - self.lengthLimit = lengthLimit - self._number = number - - - def _log(self, line): - self.logfile.write(line + '\n') - self.logfile.flush() - - - def _mungeData(self, data): - if self.lengthLimit and len(data) > self.lengthLimit: - data = data[:self.lengthLimit - 12] + '<... elided>' - return data - - - # IProtocol - def connectionMade(self): - self._log('*') - return ProtocolWrapper.connectionMade(self) - - - def dataReceived(self, data): - self._log('C %d: %r' % (self._number, self._mungeData(data))) - return ProtocolWrapper.dataReceived(self, data) - - - def connectionLost(self, reason): - self._log('C %d: %r' % (self._number, reason)) - return ProtocolWrapper.connectionLost(self, reason) - - - # ITransport - def write(self, data): - self._log('S %d: %r' % (self._number, self._mungeData(data))) - return ProtocolWrapper.write(self, data) - - - def writeSequence(self, iovec): - self._log('SV %d: %r' % (self._number, [self._mungeData(d) for d in iovec])) - return ProtocolWrapper.writeSequence(self, iovec) - - - def loseConnection(self): - self._log('S %d: *' % (self._number,)) - return ProtocolWrapper.loseConnection(self) - - - -class TrafficLoggingFactory(WrappingFactory): - protocol = TrafficLoggingProtocol - - _counter = 0 - - def __init__(self, wrappedFactory, logfilePrefix, lengthLimit=None): - self.logfilePrefix = logfilePrefix - self.lengthLimit = lengthLimit - WrappingFactory.__init__(self, wrappedFactory) - - - def open(self, name): - return file(name, 'w') - - - def buildProtocol(self, addr): - self._counter += 1 - logfile = self.open(self.logfilePrefix + '-' + str(self._counter)) - return self.protocol(self, self.wrappedFactory.buildProtocol(addr), - logfile, self.lengthLimit, self._counter) - - - def resetCounter(self): - """ - Reset the value of the counter used to identify connections. - """ - self._counter = 0 - - - -class TimeoutMixin: - """Mixin for protocols which wish to timeout connections - - @cvar timeOut: The number of seconds after which to timeout the connection. - """ - timeOut = None - - __timeoutCall = None - - def callLater(self, period, func): - return reactor.callLater(period, func) - - - def resetTimeout(self): - """Reset the timeout count down""" - if self.__timeoutCall is not None and self.timeOut is not None: - self.__timeoutCall.reset(self.timeOut) - - def setTimeout(self, period): - """Change the timeout period - - @type period: C{int} or C{NoneType} - @param period: The period, in seconds, to change the timeout to, or - C{None} to disable the timeout. - """ - prev = self.timeOut - self.timeOut = period - - if self.__timeoutCall is not None: - if period is None: - self.__timeoutCall.cancel() - self.__timeoutCall = None - else: - self.__timeoutCall.reset(period) - elif period is not None: - self.__timeoutCall = self.callLater(period, self.__timedOut) - - return prev - - def __timedOut(self): - self.__timeoutCall = None - self.timeoutConnection() - - def timeoutConnection(self): - """Called when the connection times out. - Override to define behavior other than dropping the connection. - """ - self.transport.loseConnection() diff --git a/apt_dht/util.py b/apt_dht/util.py deleted file mode 100644 index c334d1d..0000000 --- a/apt_dht/util.py +++ /dev/null @@ -1,167 +0,0 @@ - -"""Some utitlity functions for use in the apt-p2p program. - -@var isLocal: a compiled regular expression suitable for testing if an - IP address is from a known local or private range -""" - -import os, re - -from twisted.python import log -from twisted.trial import unittest - -isLocal = re.compile('^(192\.168\.[0-9]{1,3}\.[0-9]{1,3})|'+ - '(10\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})|'+ - '(172\.0?([1][6-9])|([2][0-9])|([3][0-1])\.[0-9]{1,3}\.[0-9]{1,3})|'+ - '(127\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})$') - -def findMyIPAddr(addrs, intended_port, local_ok = False): - """Find the best IP address to use from a list of possibilities. - - @param addrs: the list of possible IP addresses - @param intended_port: the port that was supposed to be used - @param local_ok: whether known local/private IP ranges are allowed - (defaults to False) - @return: the preferred IP address, or None if one couldn't be found - """ - log.msg("got addrs: %r" % (addrs,)) - my_addr = None - - # Try to find an address using the ifconfig function - try: - ifconfig = os.popen("/sbin/ifconfig |/bin/grep inet|"+ - "/usr/bin/awk '{print $2}' | "+ - "sed -e s/.*://", "r").read().strip().split('\n') - except: - ifconfig = [] - - # Get counts for all the non-local addresses returned from ifconfig - addr_count = {} - for addr in ifconfig: - if local_ok or not isLocal.match(addr): - addr_count.setdefault(addr, 0) - addr_count[addr] += 1 - - # If only one was found, use it as a starting point - local_addrs = addr_count.keys() - if len(local_addrs) == 1: - my_addr = local_addrs[0] - log.msg('Found remote address from ifconfig: %r' % (my_addr,)) - - # Get counts for all the non-local addresses returned from the DHT - addr_count = {} - port_count = {} - for addr in addrs: - if local_ok or not isLocal.match(addr[0]): - addr_count.setdefault(addr[0], 0) - addr_count[addr[0]] += 1 - port_count.setdefault(addr[1], 0) - port_count[addr[1]] += 1 - - # Find the most popular address - popular_addr = [] - popular_count = 0 - for addr in addr_count: - if addr_count[addr] > popular_count: - popular_addr = [addr] - popular_count = addr_count[addr] - elif addr_count[addr] == popular_count: - popular_addr.append(addr) - - # Find the most popular port - popular_port = [] - popular_count = 0 - for port in port_count: - if port_count[port] > popular_count: - popular_port = [port] - popular_count = port_count[port] - elif port_count[port] == popular_count: - popular_port.append(port) - - # Check to make sure the port isn't being changed - port = intended_port - if len(port_count.keys()) > 1: - log.msg('Problem, multiple ports have been found: %r' % (port_count,)) - if port not in port_count.keys(): - log.msg('And none of the ports found match the intended one') - elif len(port_count.keys()) == 1: - port = port_count.keys()[0] - else: - log.msg('Port was not found') - - # If one is popular, use that address - if len(popular_addr) == 1: - log.msg('Found popular address: %r' % (popular_addr[0],)) - if my_addr and my_addr != popular_addr[0]: - log.msg('But the popular address does not match: %s != %s' % (popular_addr[0], my_addr)) - my_addr = popular_addr[0] - elif len(popular_addr) > 1: - log.msg('Found multiple popular addresses: %r' % (popular_addr,)) - if my_addr and my_addr not in popular_addr: - log.msg('And none of the addresses found match the ifconfig one') - else: - log.msg('No non-local addresses found: %r' % (popular_addr,)) - - if not my_addr: - log.msg("Remote IP Address could not be found for this machine") - - return my_addr - -def ipAddrFromChicken(): - """Retrieve a possible IP address from the ipchecken website.""" - import urllib - ip_search = re.compile('\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}') - try: - f = urllib.urlopen("http://www.ipchicken.com") - data = f.read() - f.close() - current_ip = ip_search.findall(data) - return current_ip - except Exception: - return [] - -def uncompact(s): - """Extract the contact info from a compact peer representation. - - @type s: C{string} - @param s: the compact representation - @rtype: (C{string}, C{int}) - @return: the IP address and port number to contact the peer on - @raise ValueError: if the compact representation doesn't exist - """ - if (len(s) != 6): - raise ValueError - ip = '.'.join([str(ord(i)) for i in s[0:4]]) - port = (ord(s[4]) << 8) | ord(s[5]) - return (ip, port) - -def compact(ip, port): - """Create a compact representation of peer contact info. - - @type ip: C{string} - @param ip: the IP address of the peer - @type port: C{int} - @param port: the port number to contact the peer on - @rtype: C{string} - @return: the compact representation - @raise ValueError: if the compact representation doesn't exist - """ - - s = ''.join([chr(int(i)) for i in ip.split('.')]) + \ - chr((port & 0xFF00) >> 8) + chr(port & 0xFF) - if len(s) != 6: - raise ValueError - return s - -class TestUtil(unittest.TestCase): - """Tests for the utilities.""" - - timeout = 5 - ip = '165.234.1.34' - port = 61234 - - def test_compact(self): - """Make sure compacting is reversed correctly by uncompacting.""" - d = uncompact(compact(self.ip, self.port)) - self.failUnlessEqual(d[0], self.ip) - self.failUnlessEqual(d[1], self.port) diff --git a/apt_dht_Khashmir/DHT.py b/apt_dht_Khashmir/DHT.py deleted file mode 100644 index 399babf..0000000 --- a/apt_dht_Khashmir/DHT.py +++ /dev/null @@ -1,454 +0,0 @@ - -"""The main interface to the Khashmir DHT. - -@var khashmir_dir: the name of the directory to use for DHT files -""" - -from datetime import datetime -import os, sha, random - -from twisted.internet import defer, reactor -from twisted.internet.abstract import isIPAddress -from twisted.python import log -from twisted.trial import unittest -from zope.interface import implements - -from apt_p2p.interfaces import IDHT -from khashmir import Khashmir -from bencode import bencode, bdecode - -khashmir_dir = 'apt-p2p-Khashmir' - -class DHTError(Exception): - """Represents errors that occur in the DHT.""" - -class DHT: - """The main interface instance to the Khashmir DHT. - - @type config: C{dictionary} - @ivar config: the DHT configuration values - @type cache_dir: C{string} - @ivar cache_dir: the directory to use for storing files - @type bootstrap: C{list} of C{string} - @ivar bootstrap: the nodes to contact to bootstrap into the system - @type bootstrap_node: C{boolean} - @ivar bootstrap_node: whether this node is a bootstrap node - @type joining: L{twisted.internet.defer.Deferred} - @ivar joining: if a join is underway, the deferred that will signal it's end - @type joined: C{boolean} - @ivar joined: whether the DHT network has been successfully joined - @type outstandingJoins: C{int} - @ivar outstandingJoins: the number of bootstrap nodes that have yet to respond - @type foundAddrs: C{list} of (C{string}, C{int}) - @ivar foundAddrs: the IP address an port that were returned by bootstrap nodes - @type storing: C{dictionary} - @ivar storing: keys are keys for which store requests are active, values - are dictionaries with keys the values being stored and values the - deferred to call when complete - @type retrieving: C{dictionary} - @ivar retrieving: keys are the keys for which getValue requests are active, - values are lists of the deferreds waiting for the requests - @type retrieved: C{dictionary} - @ivar retrieved: keys are the keys for which getValue requests are active, - values are list of the values returned so far - @type config_parser: L{apt_p2p.apt_p2p_conf.AptP2PConfigParser} - @ivar config_parser: the configuration info for the main program - @type section: C{string} - @ivar section: the section of the configuration info that applies to the DHT - @type khashmir: L{khashmir.Khashmir} - @ivar khashmir: the khashmir DHT instance to use - """ - - implements(IDHT) - - def __init__(self): - """Initialize the DHT.""" - self.config = None - self.cache_dir = '' - self.bootstrap = [] - self.bootstrap_node = False - self.joining = None - self.joined = False - self.outstandingJoins = 0 - self.foundAddrs = [] - self.storing = {} - self.retrieving = {} - self.retrieved = {} - - def loadConfig(self, config, section): - """See L{apt_p2p.interfaces.IDHT}.""" - self.config_parser = config - self.section = section - self.config = {} - - # Get some initial values - self.cache_dir = os.path.join(self.config_parser.get(section, 'cache_dir'), khashmir_dir) - if not os.path.exists(self.cache_dir): - os.makedirs(self.cache_dir) - self.bootstrap = self.config_parser.getstringlist(section, 'BOOTSTRAP') - self.bootstrap_node = self.config_parser.getboolean(section, 'BOOTSTRAP_NODE') - for k in self.config_parser.options(section): - # The numbers in the config file - if k in ['K', 'HASH_LENGTH', 'CONCURRENT_REQS', 'STORE_REDUNDANCY', - 'RETRIEVE_VALUES', 'MAX_FAILURES', 'PORT']: - self.config[k] = self.config_parser.getint(section, k) - # The times in the config file - elif k in ['CHECKPOINT_INTERVAL', 'MIN_PING_INTERVAL', - 'BUCKET_STALENESS', 'KEY_EXPIRE']: - self.config[k] = self.config_parser.gettime(section, k) - # The booleans in the config file - elif k in ['SPEW']: - self.config[k] = self.config_parser.getboolean(section, k) - # Everything else is a string - else: - self.config[k] = self.config_parser.get(section, k) - - def join(self): - """See L{apt_p2p.interfaces.IDHT}.""" - if self.config is None: - raise DHTError, "configuration not loaded" - if self.joining: - raise DHTError, "a join is already in progress" - - # Create the new khashmir instance - self.khashmir = Khashmir(self.config, self.cache_dir) - - self.joining = defer.Deferred() - for node in self.bootstrap: - host, port = node.rsplit(':', 1) - port = int(port) - - # Translate host names into IP addresses - if isIPAddress(host): - self._join_gotIP(host, port) - else: - reactor.resolve(host).addCallback(self._join_gotIP, port) - - return self.joining - - def _join_gotIP(self, ip, port): - """Join the DHT using a single bootstrap nodes IP address.""" - self.outstandingJoins += 1 - self.khashmir.addContact(ip, port, self._join_single, self._join_error) - - def _join_single(self, addr): - """Process the response from the bootstrap node. - - Finish the join by contacting close nodes. - """ - self.outstandingJoins -= 1 - if addr: - self.foundAddrs.append(addr) - if addr or self.outstandingJoins <= 0: - self.khashmir.findCloseNodes(self._join_complete, self._join_complete) - log.msg('Got back from bootstrap node: %r' % (addr,)) - - def _join_error(self, failure = None): - """Process an error in contacting the bootstrap node. - - If no bootstrap nodes remain, finish the process by contacting - close nodes. - """ - self.outstandingJoins -= 1 - log.msg("bootstrap node could not be reached") - if self.outstandingJoins <= 0: - self.khashmir.findCloseNodes(self._join_complete, self._join_complete) - - def _join_complete(self, result): - """End the joining process and return the addresses found for this node.""" - if not self.joined and len(result) > 0: - self.joined = True - if self.joining and self.outstandingJoins <= 0: - df = self.joining - self.joining = None - if self.joined or self.bootstrap_node: - self.joined = True - df.callback(self.foundAddrs) - else: - df.errback(DHTError('could not find any nodes to bootstrap to')) - - def getAddrs(self): - """Get the list of addresses returned by bootstrap nodes for this node.""" - return self.foundAddrs - - def leave(self): - """See L{apt_p2p.interfaces.IDHT}.""" - if self.config is None: - raise DHTError, "configuration not loaded" - - if self.joined or self.joining: - if self.joining: - self.joining.errback(DHTError('still joining when leave was called')) - self.joining = None - self.joined = False - self.khashmir.shutdown() - - def _normKey(self, key, bits=None, bytes=None): - """Normalize the length of keys used in the DHT.""" - bits = self.config["HASH_LENGTH"] - if bits is not None: - bytes = (bits - 1) // 8 + 1 - else: - if bytes is None: - raise DHTError, "you must specify one of bits or bytes for normalization" - - # Extend short keys with null bytes - if len(key) < bytes: - key = key + '\000'*(bytes - len(key)) - # Truncate long keys - elif len(key) > bytes: - key = key[:bytes] - return key - - def getValue(self, key): - """See L{apt_p2p.interfaces.IDHT}.""" - if self.config is None: - raise DHTError, "configuration not loaded" - if not self.joined: - raise DHTError, "have not joined a network yet" - - key = self._normKey(key) - - d = defer.Deferred() - if key not in self.retrieving: - self.khashmir.valueForKey(key, self._getValue) - self.retrieving.setdefault(key, []).append(d) - return d - - def _getValue(self, key, result): - """Process a returned list of values from the DHT.""" - # Save the list of values to return when it is complete - if result: - self.retrieved.setdefault(key, []).extend([bdecode(r) for r in result]) - else: - # Empty list, the get is complete, return the result - final_result = [] - if key in self.retrieved: - final_result = self.retrieved[key] - del self.retrieved[key] - for i in range(len(self.retrieving[key])): - d = self.retrieving[key].pop(0) - d.callback(final_result) - del self.retrieving[key] - - def storeValue(self, key, value): - """See L{apt_p2p.interfaces.IDHT}.""" - if self.config is None: - raise DHTError, "configuration not loaded" - if not self.joined: - raise DHTError, "have not joined a network yet" - - key = self._normKey(key) - bvalue = bencode(value) - - if key in self.storing and bvalue in self.storing[key]: - raise DHTError, "already storing that key with the same value" - - d = defer.Deferred() - self.khashmir.storeValueForKey(key, bvalue, self._storeValue) - self.storing.setdefault(key, {})[bvalue] = d - return d - - def _storeValue(self, key, bvalue, result): - """Process the response from the DHT.""" - if key in self.storing and bvalue in self.storing[key]: - # Check if the store succeeded - if len(result) > 0: - self.storing[key][bvalue].callback(result) - else: - self.storing[key][bvalue].errback(DHTError('could not store value %s in key %s' % (bvalue, key))) - del self.storing[key][bvalue] - if len(self.storing[key].keys()) == 0: - del self.storing[key] - -class TestSimpleDHT(unittest.TestCase): - """Simple 2-node unit tests for the DHT.""" - - timeout = 2 - DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160, - 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4, - 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000, - 'MAX_FAILURES': 3, - 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600, - 'KEY_EXPIRE': 3600, 'SPEW': False, } - - def setUp(self): - self.a = DHT() - self.b = DHT() - self.a.config = self.DHT_DEFAULTS.copy() - self.a.config['PORT'] = 4044 - self.a.bootstrap = ["127.0.0.1:4044"] - self.a.bootstrap_node = True - self.a.cache_dir = '/tmp' - self.b.config = self.DHT_DEFAULTS.copy() - self.b.config['PORT'] = 4045 - self.b.bootstrap = ["127.0.0.1:4044"] - self.b.cache_dir = '/tmp' - - def test_bootstrap_join(self): - d = self.a.join() - return d - - def node_join(self, result): - d = self.b.join() - return d - - def test_join(self): - self.lastDefer = defer.Deferred() - d = self.a.join() - d.addCallback(self.node_join) - d.addCallback(self.lastDefer.callback) - return self.lastDefer - - def test_normKey(self): - h = self.a._normKey('12345678901234567890') - self.failUnless(h == '12345678901234567890') - h = self.a._normKey('12345678901234567') - self.failUnless(h == '12345678901234567\000\000\000') - h = self.a._normKey('1234567890123456789012345') - self.failUnless(h == '12345678901234567890') - h = self.a._normKey('1234567890123456789') - self.failUnless(h == '1234567890123456789\000') - h = self.a._normKey('123456789012345678901') - self.failUnless(h == '12345678901234567890') - - def value_stored(self, result, value): - self.stored -= 1 - if self.stored == 0: - self.get_values() - - def store_values(self, result): - self.stored = 3 - d = self.a.storeValue(sha.new('4045').digest(), str(4045*3)) - d.addCallback(self.value_stored, 4045) - d = self.a.storeValue(sha.new('4044').digest(), str(4044*2)) - d.addCallback(self.value_stored, 4044) - d = self.b.storeValue(sha.new('4045').digest(), str(4045*2)) - d.addCallback(self.value_stored, 4045) - - def check_values(self, result, values): - self.checked -= 1 - self.failUnless(len(result) == len(values)) - for v in result: - self.failUnless(v in values) - if self.checked == 0: - self.lastDefer.callback(1) - - def get_values(self): - self.checked = 4 - d = self.a.getValue(sha.new('4044').digest()) - d.addCallback(self.check_values, [str(4044*2)]) - d = self.b.getValue(sha.new('4044').digest()) - d.addCallback(self.check_values, [str(4044*2)]) - d = self.a.getValue(sha.new('4045').digest()) - d.addCallback(self.check_values, [str(4045*2), str(4045*3)]) - d = self.b.getValue(sha.new('4045').digest()) - d.addCallback(self.check_values, [str(4045*2), str(4045*3)]) - - def test_store(self): - from twisted.internet.base import DelayedCall - DelayedCall.debug = True - self.lastDefer = defer.Deferred() - d = self.a.join() - d.addCallback(self.node_join) - d.addCallback(self.store_values) - return self.lastDefer - - def tearDown(self): - self.a.leave() - try: - os.unlink(self.a.khashmir.store.db) - except: - pass - self.b.leave() - try: - os.unlink(self.b.khashmir.store.db) - except: - pass - -class TestMultiDHT(unittest.TestCase): - """More complicated 20-node tests for the DHT.""" - - timeout = 60 - num = 20 - DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160, - 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4, - 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000, - 'MAX_FAILURES': 3, - 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600, - 'KEY_EXPIRE': 3600, 'SPEW': False, } - - def setUp(self): - self.l = [] - self.startport = 4081 - for i in range(self.num): - self.l.append(DHT()) - self.l[i].config = self.DHT_DEFAULTS.copy() - self.l[i].config['PORT'] = self.startport + i - self.l[i].bootstrap = ["127.0.0.1:" + str(self.startport)] - self.l[i].cache_dir = '/tmp' - self.l[0].bootstrap_node = True - - def node_join(self, result, next_node): - d = self.l[next_node].join() - if next_node + 1 < len(self.l): - d.addCallback(self.node_join, next_node + 1) - else: - d.addCallback(self.lastDefer.callback) - - def test_join(self): - self.timeout = 2 - self.lastDefer = defer.Deferred() - d = self.l[0].join() - d.addCallback(self.node_join, 1) - return self.lastDefer - - def store_values(self, result, i = 0, j = 0): - if j > i: - j -= i+1 - i += 1 - if i == len(self.l): - self.get_values() - else: - d = self.l[j].storeValue(sha.new(str(self.startport+i)).digest(), str((self.startport+i)*(j+1))) - d.addCallback(self.store_values, i, j+1) - - def get_values(self, result = None, check = None, i = 0, j = 0): - if result is not None: - self.failUnless(len(result) == len(check)) - for v in result: - self.failUnless(v in check) - if j >= len(self.l): - j -= len(self.l) - i += 1 - if i == len(self.l): - self.lastDefer.callback(1) - else: - d = self.l[i].getValue(sha.new(str(self.startport+j)).digest()) - check = [] - for k in range(self.startport+j, (self.startport+j)*(j+1)+1, self.startport+j): - check.append(str(k)) - d.addCallback(self.get_values, check, i, j + random.randrange(1, min(len(self.l), 10))) - - def store_join(self, result, next_node): - d = self.l[next_node].join() - if next_node + 1 < len(self.l): - d.addCallback(self.store_join, next_node + 1) - else: - d.addCallback(self.store_values) - - def test_store(self): - from twisted.internet.base import DelayedCall - DelayedCall.debug = True - self.lastDefer = defer.Deferred() - d = self.l[0].join() - d.addCallback(self.store_join, 1) - return self.lastDefer - - def tearDown(self): - for i in self.l: - try: - i.leave() - os.unlink(i.khashmir.store.db) - except: - pass diff --git a/apt_dht_Khashmir/__init__.py b/apt_dht_Khashmir/__init__.py deleted file mode 100644 index 594e80a..0000000 --- a/apt_dht_Khashmir/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ - -"""The apt-p2p implementation of the Khashmir DHT. - -These modules implement a modified Khashmir, which is a kademlia-like -Distributed Hash Table available at:: - - http://khashmir.sourceforge.net/ - -The protocol for the implementation's communication is described here:: - - http://www.camrdale.org/apt-p2p/protocol.html - -To run the DHT you probably want to do something like:: - - from apt_p2p_Khashmir import DHT - myDHT = DHT.DHT() - myDHT.loadConfig(config, section) - myDHT.join() - -at which point you should be up and running and connected to others in the DHT. - -""" diff --git a/apt_dht_Khashmir/actions.py b/apt_dht_Khashmir/actions.py deleted file mode 100644 index 1179713..0000000 --- a/apt_dht_Khashmir/actions.py +++ /dev/null @@ -1,347 +0,0 @@ -## Copyright 2002-2004 Andrew Loewenstern, All Rights Reserved -# see LICENSE.txt for license information - -"""Details of how to perform actions on remote peers.""" - -from twisted.internet import reactor -from twisted.python import log - -from khash import intify -from util import uncompact - -class ActionBase: - """Base class for some long running asynchronous proccesses like finding nodes or values. - - @type caller: L{khashmir.Khashmir} - @ivar caller: the DHT instance that is performing the action - @type target: C{string} - @ivar target: the target of the action, usually a DHT key - @type config: C{dictionary} - @ivar config: the configuration variables for the DHT - @type action: C{string} - @ivar action: the name of the action to call on remote nodes - @type num: C{long} - @ivar num: the target key in integer form - @type queried: C{dictionary} - @ivar queried: the nodes that have been queried for this action, - keys are node IDs, values are the node itself - @type answered: C{dictionary} - @ivar answered: the nodes that have answered the queries - @type found: C{dictionary} - @ivar found: nodes that have been found so far by the action - @type sorted_nodes: C{list} of L{node.Node} - @ivar sorted_nodes: a sorted list of nodes by there proximity to the key - @type results: C{dictionary} - @ivar results: keys are the results found so far by the action - @type desired_results: C{int} - @ivar desired_results: the minimum number of results that are needed - before the action should stop - @type callback: C{method} - @ivar callback: the method to call with the results - @type outstanding: C{int} - @ivar outstanding: the number of requests currently outstanding - @type outstanding_results: C{int} - @ivar outstanding_results: the number of results that are expected from - the requests that are currently outstanding - @type finished: C{boolean} - @ivar finished: whether the action is done - @type sort: C{method} - @ivar sort: used to sort nodes by their proximity to the target - """ - - def __init__(self, caller, target, callback, config, action, num_results = None): - """Initialize the action. - - @type caller: L{khashmir.Khashmir} - @param caller: the DHT instance that is performing the action - @type target: C{string} - @param target: the target of the action, usually a DHT key - @type callback: C{method} - @param callback: the method to call with the results - @type config: C{dictionary} - @param config: the configuration variables for the DHT - @type action: C{string} - @param action: the name of the action to call on remote nodes - @type num_results: C{int} - @param num_results: the minimum number of results that are needed before - the action should stop (optional, defaults to getting all the results) - - """ - - self.caller = caller - self.target = target - self.config = config - self.action = action - self.num = intify(target) - self.queried = {} - self.answered = {} - self.found = {} - self.sorted_nodes = [] - self.results = {} - self.desired_results = num_results - self.callback = callback - self.outstanding = 0 - self.outstanding_results = 0 - self.finished = False - - def sort(a, b, num=self.num): - """Sort nodes relative to the ID we are looking for.""" - x, y = num ^ a.num, num ^ b.num - if x > y: - return 1 - elif x < y: - return -1 - return 0 - self.sort = sort - - #{ Main operation - def goWithNodes(self, nodes): - """Start the action's process with a list of nodes to contact.""" - for node in nodes: - if node.id == self.caller.node.id: - continue - else: - self.found[node.id] = node - self.sortNodes() - self.schedule() - - def schedule(self): - """Schedule requests to be sent to remote nodes.""" - # Check if we are already done - if self.desired_results and ((len(self.results) >= abs(self.desired_results)) or - (self.desired_results < 0 and - len(self.answered) >= self.config['STORE_REDUNDANCY'])): - self.finished = True - result = self.generateResult() - reactor.callLater(0, self.callback, *result) - - if self.finished or (self.desired_results and - len(self.results) + self.outstanding_results >= abs(self.desired_results)): - return - - # Loop for each node that should be processed - for node in self.getNodesToProcess(): - # Don't send requests twice or to ourself - if node.id not in self.queried and node.id != self.caller.node.id: - self.queried[node.id] = 1 - - # Get the action to call on the node - try: - f = getattr(node, self.action) - except AttributeError: - log.msg("%s doesn't have a %s method!" % (node, self.action)) - else: - # Get the arguments to the action's method - try: - args, expected_results = self.generateArgs(node) - except ValueError: - pass - else: - # Call the action on the remote node - self.outstanding += 1 - self.outstanding_results += expected_results - df = f(self.caller.node.id, *args) - df.addCallbacks(self.gotResponse, self.actionFailed, - callbackArgs = (node, expected_results), - errbackArgs = (node, expected_results)) - - # We might have to stop for now - if (self.outstanding >= self.config['CONCURRENT_REQS'] or - (self.desired_results and - len(self.results) + self.outstanding_results >= abs(self.desired_results))): - break - - assert self.outstanding >= 0 - assert self.outstanding_results >= 0 - - # If no requests are outstanding, then we are done - if self.outstanding == 0: - self.finished = True - result = self.generateResult() - reactor.callLater(0, self.callback, *result) - - def gotResponse(self, dict, node, expected_results): - """Receive a response from a remote node.""" - self.caller.insertNode(node) - if self.finished or self.answered.has_key(node.id): - # a day late and a dollar short - return - self.outstanding -= 1 - self.outstanding_results -= expected_results - self.answered[node.id] = 1 - self.processResponse(dict['rsp']) - self.schedule() - - def actionFailed(self, err, node, expected_results): - """Receive an error from a remote node.""" - log.msg("action %s failed (%s) %s/%s" % (self.action, self.config['PORT'], node.host, node.port)) - log.err(err) - self.caller.table.nodeFailed(node) - self.outstanding -= 1 - self.outstanding_results -= expected_results - self.schedule() - - def handleGotNodes(self, nodes): - """Process any received node contact info in the response. - - Not called by default, but suitable for being called by - L{processResponse} in a recursive node search. - """ - for compact_node in nodes: - node_contact = uncompact(compact_node) - node = self.caller.Node(node_contact) - if not self.found.has_key(node.id): - self.found[node.id] = node - - def sortNodes(self): - """Sort the nodes, if necessary. - - Assumes nodes are never removed from the L{found} dictionary. - """ - if len(self.sorted_nodes) != len(self.found): - self.sorted_nodes = self.found.values() - self.sorted_nodes.sort(self.sort) - - #{ Subclass for specific actions - def getNodesToProcess(self): - """Generate a list of nodes to process next. - - This implementation is suitable for a recurring search over all nodes. - """ - self.sortNodes() - return self.sorted_nodes[:self.config['K']] - - def generateArgs(self, node): - """Generate the arguments to the node's action. - - These arguments will be appended to our node ID when calling the action. - Also return the number of results expected from this request. - - @raise ValueError: if the node should not be queried - """ - return (self.target, ), 0 - - def processResponse(self, dict): - """Process the response dictionary received from the remote node.""" - self.handleGotNodes(dict['nodes']) - - def generateResult(self, nodes): - """Create the final result to return to the L{callback} function.""" - return [] - - -class FindNode(ActionBase): - """Find the closest nodes to the key.""" - - def __init__(self, caller, target, callback, config, action="findNode"): - ActionBase.__init__(self, caller, target, callback, config, action) - - def processResponse(self, dict): - """Save the token received from each node.""" - if dict["id"] in self.found: - self.found[dict["id"]].updateToken(dict.get('token', '')) - self.handleGotNodes(dict['nodes']) - - def generateResult(self): - """Result is the K closest nodes to the target.""" - self.sortNodes() - return (self.sorted_nodes[:self.config['K']], ) - - -class FindValue(ActionBase): - """Find the closest nodes to the key and check for values.""" - - def __init__(self, caller, target, callback, config, action="findValue"): - ActionBase.__init__(self, caller, target, callback, config, action) - - def processResponse(self, dict): - """Save the number of values each node has.""" - if dict["id"] in self.found: - self.found[dict["id"]].updateNumValues(dict.get('num', 0)) - self.handleGotNodes(dict['nodes']) - - def generateResult(self): - """Result is the nodes that have values, sorted by proximity to the key.""" - self.sortNodes() - return ([node for node in self.sorted_nodes if node.num_values > 0], ) - - -class GetValue(ActionBase): - """Retrieve values from a list of nodes.""" - - def __init__(self, caller, target, local_results, num_results, callback, config, action="getValue"): - """Initialize the action with the locally available results. - - @type local_results: C{list} of C{string} - @param local_results: the values that were available in this node - """ - ActionBase.__init__(self, caller, target, callback, config, action, num_results) - if local_results: - for result in local_results: - self.results[result] = 1 - - def getNodesToProcess(self): - """Nodes are never added, always return the same sorted node list.""" - return self.sorted_nodes - - def generateArgs(self, node): - """Arguments include the number of values to request.""" - if node.num_values > 0: - # Request all desired results from each node, just to be sure. - num_values = abs(self.desired_results) - len(self.results) - assert num_values > 0 - if num_values > node.num_values: - num_values = 0 - return (self.target, num_values), node.num_values - else: - raise ValueError, "Don't try and get values from this node because it doesn't have any" - - def processResponse(self, dict): - """Save the returned values, calling the L{callback} each time there are new ones.""" - if dict.has_key('values'): - def x(y, z=self.results): - if not z.has_key(y): - z[y] = 1 - return y - else: - return None - z = len(dict['values']) - v = filter(None, map(x, dict['values'])) - if len(v): - reactor.callLater(0, self.callback, self.target, v) - - def generateResult(self): - """Results have all been returned, now send the empty list to end the action.""" - return (self.target, []) - - -class StoreValue(ActionBase): - """Store a value in a list of nodes.""" - - def __init__(self, caller, target, value, num_results, callback, config, action="storeValue"): - """Initialize the action with the value to store. - - @type value: C{string} - @param value: the value to store in the nodes - """ - ActionBase.__init__(self, caller, target, callback, config, action, num_results) - self.value = value - - def getNodesToProcess(self): - """Nodes are never added, always return the same sorted list.""" - return self.sorted_nodes - - def generateArgs(self, node): - """Args include the value to store and the node's token.""" - if node.token: - return (self.target, self.value, node.token), 1 - else: - raise ValueError, "Don't store at this node since we don't know it's token" - - def processResponse(self, dict): - """Save the response, though it should be nothin but the ID.""" - self.results[dict["id"]] = dict - - def generateResult(self): - """Return all the response IDs received.""" - return (self.target, self.value, self.results.values()) diff --git a/apt_dht_Khashmir/bencode.py b/apt_dht_Khashmir/bencode.py deleted file mode 100644 index 06a64e7..0000000 --- a/apt_dht_Khashmir/bencode.py +++ /dev/null @@ -1,480 +0,0 @@ - -"""Functions for bencoding and bdecoding data. - -@type decode_func: C{dictionary} of C{function} -@var decode_func: a dictionary of function calls to be made, based on data, - the keys are the first character of the data and the value is the - function to use to decode that data -@type bencached_marker: C{list} -@var bencached_marker: mutable type to ensure class origination -@type encode_func: C{dictionary} of C{function} -@var encode_func: a dictionary of function calls to be made, based on data, - the keys are the type of the data and the value is the - function to use to encode that data -@type BencachedType: C{type} -@var BencachedType: the L{Bencached} type -""" - -from types import IntType, LongType, StringType, ListType, TupleType, DictType, BooleanType -try: - from types import UnicodeType -except ImportError: - UnicodeType = None -from datetime import datetime -import time - -from twisted.python import log -from twisted.trial import unittest - -class BencodeError(ValueError): - pass - -def decode_int(x, f): - """Bdecode an integer. - - @type x: C{string} - @param x: the data to decode - @type f: C{int} - @param f: the offset in the data to start at - @rtype: C{int}, C{int} - @return: the bdecoded integer, and the offset to read next - @raise BencodeError: if the data is improperly encoded - - """ - - f += 1 - newf = x.index('e', f) - try: - n = int(x[f:newf]) - except: - n = long(x[f:newf]) - if x[f] == '-': - if x[f + 1] == '0': - raise BencodeError, "integer has a leading zero after a negative sign" - elif x[f] == '0' and newf != f+1: - raise BencodeError, "integer has a leading zero" - return (n, newf+1) - -def decode_string(x, f): - """Bdecode a string. - - @type x: C{string} - @param x: the data to decode - @type f: C{int} - @param f: the offset in the data to start at - @rtype: C{string}, C{int} - @return: the bdecoded string, and the offset to read next - @raise BencodeError: if the data is improperly encoded - - """ - - colon = x.index(':', f) - try: - n = int(x[f:colon]) - except (OverflowError, ValueError): - n = long(x[f:colon]) - if x[f] == '0' and colon != f+1: - raise BencodeError, "string length has a leading zero" - colon += 1 - return (x[colon:colon+n], colon+n) - -def decode_unicode(x, f): - """Bdecode a unicode string. - - @type x: C{string} - @param x: the data to decode - @type f: C{int} - @param f: the offset in the data to start at - @rtype: C{int}, C{int} - @return: the bdecoded unicode string, and the offset to read next - - """ - - s, f = decode_string(x, f+1) - return (s.decode('UTF-8'),f) - -def decode_datetime(x, f): - """Bdecode a datetime value. - - @type x: C{string} - @param x: the data to decode - @type f: C{int} - @param f: the offset in the data to start at - @rtype: C{datetime.datetime}, C{int} - @return: the bdecoded integer, and the offset to read next - @raise BencodeError: if the data is improperly encoded - - """ - - f += 1 - newf = x.index('e', f) - try: - date = datetime(*(time.strptime(x[f:newf], '%Y-%m-%dT%H:%M:%S')[0:6])) - except: - raise BencodeError, "datetime value could not be decoded: %s" % x[f:newf] - return (date, newf+1) - -def decode_list(x, f): - """Bdecode a list. - - @type x: C{string} - @param x: the data to decode - @type f: C{int} - @param f: the offset in the data to start at - @rtype: C{list}, C{int} - @return: the bdecoded list, and the offset to read next - - """ - - r, f = [], f+1 - while x[f] != 'e': - v, f = decode_func[x[f]](x, f) - r.append(v) - return (r, f + 1) - -def decode_dict(x, f): - """Bdecode a dictionary. - - @type x: C{string} - @param x: the data to decode - @type f: C{int} - @param f: the offset in the data to start at - @rtype: C{dictionary}, C{int} - @return: the bdecoded dictionary, and the offset to read next - @raise BencodeError: if the data is improperly encoded - - """ - - r, f = {}, f+1 - lastkey = None - while x[f] != 'e': - k, f = decode_string(x, f) - if lastkey >= k: - raise BencodeError, "dictionary keys must be in sorted order" - lastkey = k - r[k], f = decode_func[x[f]](x, f) - return (r, f + 1) - -decode_func = {} -decode_func['l'] = decode_list -decode_func['d'] = decode_dict -decode_func['i'] = decode_int -decode_func['0'] = decode_string -decode_func['1'] = decode_string -decode_func['2'] = decode_string -decode_func['3'] = decode_string -decode_func['4'] = decode_string -decode_func['5'] = decode_string -decode_func['6'] = decode_string -decode_func['7'] = decode_string -decode_func['8'] = decode_string -decode_func['9'] = decode_string -decode_func['u'] = decode_unicode -decode_func['t'] = decode_datetime - -def bdecode(x, sloppy = False): - """Bdecode a string of data. - - @type x: C{string} - @param x: the data to decode - @type sloppy: C{boolean} - @param sloppy: whether to allow errors in the decoding - @rtype: unknown - @return: the bdecoded data - @raise BencodeError: if the data is improperly encoded - - """ - - try: - r, l = decode_func[x[0]](x, 0) -# except (IndexError, KeyError): - except (IndexError, KeyError, ValueError): - raise BencodeError, "bad bencoded data" - if not sloppy and l != len(x): - raise BencodeError, "bad bencoded data, all could not be decoded" - return r - -bencached_marker = [] - -class Bencached(object): - """Dummy data structure for storing bencoded data in memory. - - @type marker: C{list} - @ivar marker: mutable type to make sure the data was encoded by this class - @type bencoded: C{string} - @ivar bencoded: the bencoded data stored in a string - - """ - - def __init__(self, s): - """ - - @type s: C{string} - @param s: the new bencoded data to store - - """ - - self.marker = bencached_marker - self.bencoded = s - -BencachedType = type(Bencached('')) # insufficient, but good as a filter - -def encode_bencached(x,r): - """Bencode L{Bencached} data. - - @type x: L{Bencached} - @param x: the data to encode - @type r: C{list} - @param r: the currently bencoded data, to which the bencoding of x - will be appended - - """ - - assert x.marker == bencached_marker - r.append(x.bencoded) - -def encode_int(x,r): - """Bencode an integer. - - @type x: C{int} - @param x: the data to encode - @type r: C{list} - @param r: the currently bencoded data, to which the bencoding of x - will be appended - - """ - - r.extend(('i',str(x),'e')) - -def encode_bool(x,r): - """Bencode a boolean. - - @type x: C{boolean} - @param x: the data to encode - @type r: C{list} - @param r: the currently bencoded data, to which the bencoding of x - will be appended - - """ - - encode_int(int(x),r) - -def encode_string(x,r): - """Bencode a string. - - @type x: C{string} - @param x: the data to encode - @type r: C{list} - @param r: the currently bencoded data, to which the bencoding of x - will be appended - - """ - - r.extend((str(len(x)),':',x)) - -def encode_unicode(x,r): - """Bencode a unicode string. - - @type x: C{unicode} - @param x: the data to encode - @type r: C{list} - @param r: the currently bencoded data, to which the bencoding of x - will be appended - - """ - - #r.append('u') - encode_string(x.encode('UTF-8'),r) - -def encode_datetime(x,r): - """Bencode a datetime value in UTC. - - If the datetime object has time zone info, it is converted to UTC time. - Otherwise it is assumed that the time is already in UTC time. - Microseconds are removed. - - @type x: C{datetime.datetime} - @param x: the data to encode - @type r: C{list} - @param r: the currently bencoded data, to which the bencoding of x - will be appended - - """ - - date = x.replace(microsecond = 0) - offset = date.utcoffset() - if offset is not None: - utcdate = date.replace(tzinfo = None) + offset - else: - utcdate = date - r.extend(('t',utcdate.isoformat(),'e')) - -def encode_list(x,r): - """Bencode a list. - - @type x: C{list} - @param x: the data to encode - @type r: C{list} - @param r: the currently bencoded data, to which the bencoding of x - will be appended - - """ - - r.append('l') - for e in x: - encode_func[type(e)](e, r) - r.append('e') - -def encode_dict(x,r): - """Bencode a dictionary. - - @type x: C{dictionary} - @param x: the data to encode - @type r: C{list} - @param r: the currently bencoded data, to which the bencoding of x - will be appended - - """ - - r.append('d') - ilist = x.items() - ilist.sort() - for k,v in ilist: - r.extend((str(len(k)),':',k)) - encode_func[type(v)](v, r) - r.append('e') - -encode_func = {} -encode_func[BencachedType] = encode_bencached -encode_func[IntType] = encode_int -encode_func[LongType] = encode_int -encode_func[StringType] = encode_string -encode_func[ListType] = encode_list -encode_func[TupleType] = encode_list -encode_func[DictType] = encode_dict -encode_func[BooleanType] = encode_bool -encode_func[datetime] = encode_datetime -if UnicodeType: - encode_func[UnicodeType] = encode_unicode - -def bencode(x): - """Bencode some data. - - @type x: unknown - @param x: the data to encode - @rtype: string - @return: the bencoded data - @raise BencodeError: if the data contains a type that cannot be encoded - - """ - r = [] - try: - encode_func[type(x)](x, r) - except: - raise BencodeError, "failed to bencode the data" - return ''.join(r) - -class TestBencode(unittest.TestCase): - """Test the bencoding and bdecoding of data.""" - - timeout = 2 - - def test_bdecode_string(self): - self.failUnlessRaises(BencodeError, bdecode, '0:0:') - self.failUnlessRaises(BencodeError, bdecode, '') - self.failUnlessRaises(BencodeError, bdecode, '35208734823ljdahflajhdf') - self.failUnlessRaises(BencodeError, bdecode, '2:abfdjslhfld') - self.failUnlessEqual(bdecode('0:'), '') - self.failUnlessEqual(bdecode('3:abc'), 'abc') - self.failUnlessEqual(bdecode('10:1234567890'), '1234567890') - self.failUnlessRaises(BencodeError, bdecode, '02:xy') - self.failUnlessRaises(BencodeError, bdecode, '9999:x') - - def test_bdecode_int(self): - self.failUnlessRaises(BencodeError, bdecode, 'ie') - self.failUnlessRaises(BencodeError, bdecode, 'i341foo382e') - self.failUnlessEqual(bdecode('i4e'), 4L) - self.failUnlessEqual(bdecode('i0e'), 0L) - self.failUnlessEqual(bdecode('i123456789e'), 123456789L) - self.failUnlessEqual(bdecode('i-10e'), -10L) - self.failUnlessRaises(BencodeError, bdecode, 'i-0e') - self.failUnlessRaises(BencodeError, bdecode, 'i123') - self.failUnlessRaises(BencodeError, bdecode, 'i6easd') - self.failUnlessRaises(BencodeError, bdecode, 'i03e') - - def test_bdecode_list(self): - self.failUnlessRaises(BencodeError, bdecode, 'l') - self.failUnlessEqual(bdecode('le'), []) - self.failUnlessRaises(BencodeError, bdecode, 'leanfdldjfh') - self.failUnlessEqual(bdecode('l0:0:0:e'), ['', '', '']) - self.failUnlessRaises(BencodeError, bdecode, 'relwjhrlewjh') - self.failUnlessEqual(bdecode('li1ei2ei3ee'), [1, 2, 3]) - self.failUnlessEqual(bdecode('l3:asd2:xye'), ['asd', 'xy']) - self.failUnlessEqual(bdecode('ll5:Alice3:Bobeli2ei3eee'), [['Alice', 'Bob'], [2, 3]]) - self.failUnlessRaises(BencodeError, bdecode, 'l01:ae') - self.failUnlessRaises(BencodeError, bdecode, 'l0:') - - def test_bdecode_dict(self): - self.failUnlessRaises(BencodeError, bdecode, 'd') - self.failUnlessRaises(BencodeError, bdecode, 'defoobar') - self.failUnlessEqual(bdecode('de'), {}) - self.failUnlessEqual(bdecode('d3:agei25e4:eyes4:bluee'), {'age': 25, 'eyes': 'blue'}) - self.failUnlessEqual(bdecode('d8:spam.mp3d6:author5:Alice6:lengthi100000eee'), - {'spam.mp3': {'author': 'Alice', 'length': 100000}}) - self.failUnlessRaises(BencodeError, bdecode, 'd3:fooe') - self.failUnlessRaises(BencodeError, bdecode, 'di1e0:e') - self.failUnlessRaises(BencodeError, bdecode, 'd1:b0:1:a0:e') - self.failUnlessRaises(BencodeError, bdecode, 'd1:a0:1:a0:e') - self.failUnlessRaises(BencodeError, bdecode, 'd0:0:') - self.failUnlessRaises(BencodeError, bdecode, 'd0:') - - def test_bdecode_unicode(self): - self.failUnlessRaises(BencodeError, bdecode, 'u0:0:') - self.failUnlessRaises(BencodeError, bdecode, 'u') - self.failUnlessRaises(BencodeError, bdecode, 'u35208734823ljdahflajhdf') - self.failUnlessRaises(BencodeError, bdecode, 'u2:abfdjslhfld') - self.failUnlessEqual(bdecode('u0:'), '') - self.failUnlessEqual(bdecode('u3:abc'), 'abc') - self.failUnlessEqual(bdecode('u10:1234567890'), '1234567890') - self.failUnlessRaises(BencodeError, bdecode, 'u02:xy') - self.failUnlessRaises(BencodeError, bdecode, 'u9999:x') - - def test_bencode_int(self): - self.failUnlessEqual(bencode(4), 'i4e') - self.failUnlessEqual(bencode(0), 'i0e') - self.failUnlessEqual(bencode(-10), 'i-10e') - self.failUnlessEqual(bencode(12345678901234567890L), 'i12345678901234567890e') - - def test_bencode_string(self): - self.failUnlessEqual(bencode(''), '0:') - self.failUnlessEqual(bencode('abc'), '3:abc') - self.failUnlessEqual(bencode('1234567890'), '10:1234567890') - - def test_bencode_list(self): - self.failUnlessEqual(bencode([]), 'le') - self.failUnlessEqual(bencode([1, 2, 3]), 'li1ei2ei3ee') - self.failUnlessEqual(bencode([['Alice', 'Bob'], [2, 3]]), 'll5:Alice3:Bobeli2ei3eee') - - def test_bencode_dict(self): - self.failUnlessEqual(bencode({}), 'de') - self.failUnlessEqual(bencode({'age': 25, 'eyes': 'blue'}), 'd3:agei25e4:eyes4:bluee') - self.failUnlessEqual(bencode({'spam.mp3': {'author': 'Alice', 'length': 100000}}), - 'd8:spam.mp3d6:author5:Alice6:lengthi100000eee') - self.failUnlessRaises(BencodeError, bencode, {1: 'foo'}) - - def test_bencode_unicode(self): - self.failUnlessEqual(bencode(u''), '0:') - self.failUnlessEqual(bencode(u'abc'), '3:abc') - self.failUnlessEqual(bencode(u'1234567890'), '10:1234567890') - - def test_bool(self): - self.failUnless(bdecode(bencode(True))) - self.failIf(bdecode(bencode(False))) - - def test_datetime(self): - date = datetime.utcnow() - self.failUnlessEqual(bdecode(bencode(date)), date.replace(microsecond = 0)) - - if UnicodeType == None: - test_bencode_unicode.skip = "Python was not compiled with unicode support" - test_bdecode_unicode.skip = "Python was not compiled with unicode support" diff --git a/apt_dht_Khashmir/db.py b/apt_dht_Khashmir/db.py deleted file mode 100644 index 47e974c..0000000 --- a/apt_dht_Khashmir/db.py +++ /dev/null @@ -1,206 +0,0 @@ - -"""An sqlite database for storing nodes and key/value pairs.""" - -from datetime import datetime, timedelta -from pysqlite2 import dbapi2 as sqlite -from binascii import a2b_base64, b2a_base64 -from time import sleep -import os - -from twisted.trial import unittest - -class DBExcept(Exception): - pass - -class khash(str): - """Dummy class to convert all hashes to base64 for storing in the DB.""" - -class dht_value(str): - """Dummy class to convert all DHT values to base64 for storing in the DB.""" - -# Initialize the database to work with 'khash' objects (binary strings) -sqlite.register_adapter(khash, b2a_base64) -sqlite.register_converter("KHASH", a2b_base64) -sqlite.register_converter("khash", a2b_base64) - -# Initialize the database to work with DHT values (binary strings) -sqlite.register_adapter(dht_value, b2a_base64) -sqlite.register_converter("DHT_VALUE", a2b_base64) -sqlite.register_converter("dht_value", a2b_base64) - -class DB: - """An sqlite database for storing persistent node info and key/value pairs. - - @type db: C{string} - @ivar db: the database file to use - @type conn: L{pysqlite2.dbapi2.Connection} - @ivar conn: an open connection to the sqlite database - """ - - def __init__(self, db): - """Load or create the database file. - - @type db: C{string} - @param db: the database file to use - """ - self.db = db - try: - os.stat(db) - except OSError: - self._createNewDB(db) - else: - self._loadDB(db) - if sqlite.version_info < (2, 1): - sqlite.register_converter("TEXT", str) - sqlite.register_converter("text", str) - else: - self.conn.text_factory = str - - #{ Loading the DB - def _loadDB(self, db): - """Open a new connection to the existing database file""" - try: - self.conn = sqlite.connect(database=db, detect_types=sqlite.PARSE_DECLTYPES) - except: - import traceback - raise DBExcept, "Couldn't open DB", traceback.format_exc() - - def _createNewDB(self, db): - """Open a connection to a new database and create the necessary tables.""" - self.conn = sqlite.connect(database=db, detect_types=sqlite.PARSE_DECLTYPES) - c = self.conn.cursor() - c.execute("CREATE TABLE kv (key KHASH, value DHT_VALUE, last_refresh TIMESTAMP, "+ - "PRIMARY KEY (key, value))") - c.execute("CREATE INDEX kv_key ON kv(key)") - c.execute("CREATE INDEX kv_last_refresh ON kv(last_refresh)") - c.execute("CREATE TABLE nodes (id KHASH PRIMARY KEY, host TEXT, port NUMBER)") - c.execute("CREATE TABLE self (num NUMBER PRIMARY KEY, id KHASH)") - self.conn.commit() - - def close(self): - self.conn.close() - - #{ This node's ID - def getSelfNode(self): - """Retrieve this node's ID from a previous run of the program.""" - c = self.conn.cursor() - c.execute('SELECT id FROM self WHERE num = 0') - id = c.fetchone() - if id: - return id[0] - else: - return None - - def saveSelfNode(self, id): - """Store this node's ID for a subsequent run of the program.""" - c = self.conn.cursor() - c.execute("INSERT OR REPLACE INTO self VALUES (0, ?)", (khash(id),)) - self.conn.commit() - - #{ Routing table - def dumpRoutingTable(self, buckets): - """Save routing table nodes to the database.""" - c = self.conn.cursor() - c.execute("DELETE FROM nodes WHERE id NOT NULL") - for bucket in buckets: - for node in bucket.l: - c.execute("INSERT INTO nodes VALUES (?, ?, ?)", (khash(node.id), node.host, node.port)) - self.conn.commit() - - def getRoutingTable(self): - """Load routing table nodes from database.""" - c = self.conn.cursor() - c.execute("SELECT * FROM nodes") - return c.fetchall() - - #{ Key/value pairs - def retrieveValues(self, key): - """Retrieve values from the database.""" - c = self.conn.cursor() - c.execute("SELECT value FROM kv WHERE key = ?", (khash(key),)) - l = [] - rows = c.fetchall() - for row in rows: - l.append(row[0]) - return l - - def countValues(self, key): - """Count the number of values in the database.""" - c = self.conn.cursor() - c.execute("SELECT COUNT(value) as num_values FROM kv WHERE key = ?", (khash(key),)) - res = 0 - row = c.fetchone() - if row: - res = row[0] - return res - - def storeValue(self, key, value): - """Store or update a key and value.""" - c = self.conn.cursor() - c.execute("INSERT OR REPLACE INTO kv VALUES (?, ?, ?)", - (khash(key), dht_value(value), datetime.now())) - self.conn.commit() - - def expireValues(self, expireAfter): - """Expire older values after expireAfter seconds.""" - t = datetime.now() - timedelta(seconds=expireAfter) - c = self.conn.cursor() - c.execute("DELETE FROM kv WHERE last_refresh < ?", (t, )) - self.conn.commit() - -class TestDB(unittest.TestCase): - """Tests for the khashmir database.""" - - timeout = 5 - db = '/tmp/khashmir.db' - key = '\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!' - - def setUp(self): - self.store = DB(self.db) - - def test_selfNode(self): - self.store.saveSelfNode(self.key) - self.failUnlessEqual(self.store.getSelfNode(), self.key) - - def test_Value(self): - self.store.storeValue(self.key, self.key) - val = self.store.retrieveValues(self.key) - self.failUnlessEqual(len(val), 1) - self.failUnlessEqual(val[0], self.key) - - def test_expireValues(self): - self.store.storeValue(self.key, self.key) - sleep(2) - self.store.storeValue(self.key, self.key+self.key) - self.store.expireValues(1) - val = self.store.retrieveValues(self.key) - self.failUnlessEqual(len(val), 1) - self.failUnlessEqual(val[0], self.key+self.key) - - def test_RoutingTable(self): - class dummy: - id = self.key - host = "127.0.0.1" - port = 9977 - def contents(self): - return (self.id, self.host, self.port) - dummy2 = dummy() - dummy2.id = '\xaa\xbb\xcc\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!' - dummy2.host = '205.23.67.124' - dummy2.port = 12345 - class bl: - def __init__(self): - self.l = [] - bl1 = bl() - bl1.l.append(dummy()) - bl2 = bl() - bl2.l.append(dummy2) - buckets = [bl1, bl2] - self.store.dumpRoutingTable(buckets) - rt = self.store.getRoutingTable() - self.failUnlessIn(dummy().contents(), rt) - self.failUnlessIn(dummy2.contents(), rt) - - def tearDown(self): - self.store.close() - os.unlink(self.db) diff --git a/apt_dht_Khashmir/khash.py b/apt_dht_Khashmir/khash.py deleted file mode 100644 index 91db232..0000000 --- a/apt_dht_Khashmir/khash.py +++ /dev/null @@ -1,103 +0,0 @@ -## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved -# see LICENSE.txt for license information - -"""Functions to deal with hashes (node IDs and keys).""" - -from sha import sha -from os import urandom - -from twisted.trial import unittest - -def intify(hstr): - """Convert a hash (big-endian) to a long python integer.""" - assert len(hstr) == 20 - return long(hstr.encode('hex'), 16) - -def stringify(num): - """Convert a long python integer to a hash.""" - str = hex(num)[2:] - if str[-1] == 'L': - str = str[:-1] - if len(str) % 2 != 0: - str = '0' + str - str = str.decode('hex') - return (20 - len(str)) *'\x00' + str - -def distance(a, b): - """Calculate the distance between two hashes expressed as strings.""" - return intify(a) ^ intify(b) - -def newID(): - """Get a new pseudorandom globally unique hash string.""" - h = sha() - h.update(urandom(20)) - return h.digest() - -def newIDInRange(min, max): - """Get a new pseudorandom globally unique hash string in the range.""" - return stringify(randRange(min,max)) - -def randRange(min, max): - """Get a new pseudorandom globally unique hash number in the range.""" - return min + intify(newID()) % (max - min) - -def newTID(): - """Get a new pseudorandom transaction ID number.""" - return randRange(-2**30, 2**30) - -class TestNewID(unittest.TestCase): - """Test the newID function.""" - def testLength(self): - self.failUnlessEqual(len(newID()), 20) - def testHundreds(self): - for x in xrange(100): - self.testLength - -class TestIntify(unittest.TestCase): - """Test the intify function.""" - known = [('\0' * 20, 0), - ('\xff' * 20, 2L**160 - 1), - ] - def testKnown(self): - for str, value in self.known: - self.failUnlessEqual(intify(str), value) - def testEndianessOnce(self): - h = newID() - while h[-1] == '\xff': - h = newID() - k = h[:-1] + chr(ord(h[-1]) + 1) - self.failUnlessEqual(intify(k) - intify(h), 1) - def testEndianessLots(self): - for x in xrange(100): - self.testEndianessOnce() - -class TestDisantance(unittest.TestCase): - """Test the distance function.""" - known = [ - (("\0" * 20, "\xff" * 20), 2**160L -1), - ((sha("foo").digest(), sha("foo").digest()), 0), - ((sha("bar").digest(), sha("bar").digest()), 0) - ] - def testKnown(self): - for pair, dist in self.known: - self.failUnlessEqual(distance(pair[0], pair[1]), dist) - def testCommutitive(self): - for i in xrange(100): - x, y, z = newID(), newID(), newID() - self.failUnlessEqual(distance(x,y) ^ distance(y, z), distance(x, z)) - -class TestRandRange(unittest.TestCase): - """Test the randRange function.""" - def testOnce(self): - a = intify(newID()) - b = intify(newID()) - if a < b: - c = randRange(a, b) - self.failUnlessEqual(a <= c < b, True, "output out of range %d %d %d" % (b, c, a)) - else: - c = randRange(b, a) - self.failUnlessEqual(b <= c < a, True, "output out of range %d %d %d" % (b, c, a)) - - def testOneHundredTimes(self): - for i in xrange(100): - self.testOnce() diff --git a/apt_dht_Khashmir/khashmir.py b/apt_dht_Khashmir/khashmir.py deleted file mode 100644 index 126a30e..0000000 --- a/apt_dht_Khashmir/khashmir.py +++ /dev/null @@ -1,666 +0,0 @@ -## Copyright 2002-2004 Andrew Loewenstern, All Rights Reserved -# see LICENSE.txt for license information - -"""The main Khashmir program.""" - -import warnings -warnings.simplefilter("ignore", DeprecationWarning) - -from datetime import datetime, timedelta -from random import randrange, shuffle -from sha import sha -import os - -from twisted.internet.defer import Deferred -from twisted.internet import protocol, reactor -from twisted.trial import unittest - -from db import DB -from ktable import KTable -from knode import KNodeBase, KNodeRead, KNodeWrite, NULL_ID -from khash import newID, newIDInRange -from actions import FindNode, FindValue, GetValue, StoreValue -import krpc - -class KhashmirBase(protocol.Factory): - """The base Khashmir class, with base functionality and find node, no key-value mappings. - - @type _Node: L{node.Node} - @ivar _Node: the knode implementation to use for this class of DHT - @type config: C{dictionary} - @ivar config: the configuration parameters for the DHT - @type port: C{int} - @ivar port: the port to listen on - @type store: L{db.DB} - @ivar store: the database to store nodes and key/value pairs in - @type node: L{node.Node} - @ivar node: this node - @type table: L{ktable.KTable} - @ivar table: the routing table - @type token_secrets: C{list} of C{string} - @ivar token_secrets: the current secrets to use to create tokens - @type udp: L{krpc.hostbroker} - @ivar udp: the factory for the KRPC protocol - @type listenport: L{twisted.internet.interfaces.IListeningPort} - @ivar listenport: the UDP listening port - @type next_checkpoint: L{twisted.internet.interfaces.IDelayedCall} - @ivar next_checkpoint: the delayed call for the next checkpoint - """ - - _Node = KNodeBase - - def __init__(self, config, cache_dir='/tmp'): - """Initialize the Khashmir class and call the L{setup} method. - - @type config: C{dictionary} - @param config: the configuration parameters for the DHT - @type cache_dir: C{string} - @param cache_dir: the directory to store all files in - (optional, defaults to the /tmp directory) - """ - self.config = None - self.setup(config, cache_dir) - - def setup(self, config, cache_dir): - """Setup all the Khashmir sub-modules. - - @type config: C{dictionary} - @param config: the configuration parameters for the DHT - @type cache_dir: C{string} - @param cache_dir: the directory to store all files in - """ - self.config = config - self.port = config['PORT'] - self.store = DB(os.path.join(cache_dir, 'khashmir.' + str(self.port) + '.db')) - self.node = self._loadSelfNode('', self.port) - self.table = KTable(self.node, config) - self.token_secrets = [newID()] - - # Start listening - self.udp = krpc.hostbroker(self, config) - self.udp.protocol = krpc.KRPC - self.listenport = reactor.listenUDP(self.port, self.udp) - - # Load the routing table and begin checkpointing - self._loadRoutingTable() - self.refreshTable(force = True) - self.next_checkpoint = reactor.callLater(60, self.checkpoint) - - def Node(self, id, host = None, port = None): - """Create a new node. - - @see: L{node.Node.__init__} - """ - n = self._Node(id, host, port) - n.table = self.table - n.conn = self.udp.connectionForAddr((n.host, n.port)) - return n - - def __del__(self): - """Stop listening for packets.""" - self.listenport.stopListening() - - def _loadSelfNode(self, host, port): - """Create this node, loading any previously saved one.""" - id = self.store.getSelfNode() - if not id: - id = newID() - return self._Node(id, host, port) - - def checkpoint(self): - """Perform some periodic maintenance operations.""" - # Create a new token secret - self.token_secrets.insert(0, newID()) - if len(self.token_secrets) > 3: - self.token_secrets.pop() - - # Save some parameters for reloading - self.store.saveSelfNode(self.node.id) - self.store.dumpRoutingTable(self.table.buckets) - - # DHT maintenance - self.store.expireValues(self.config['KEY_EXPIRE']) - self.refreshTable() - - self.next_checkpoint = reactor.callLater(randrange(int(self.config['CHECKPOINT_INTERVAL'] * .9), - int(self.config['CHECKPOINT_INTERVAL'] * 1.1)), - self.checkpoint) - - def _loadRoutingTable(self): - """Load the previous routing table nodes from the database. - - It's usually a good idea to call refreshTable(force = True) after - loading the table. - """ - nodes = self.store.getRoutingTable() - for rec in nodes: - n = self.Node(rec[0], rec[1], int(rec[2])) - self.table.insertNode(n, contacted = False) - - #{ Local interface - def addContact(self, host, port, callback=None, errback=None): - """Ping this node and add the contact info to the table on pong. - - @type host: C{string} - @param host: the IP address of the node to contact - @type port: C{int} - @param port:the port of the node to contact - @type callback: C{method} - @param callback: the method to call with the results, it must take 1 - parameter, the contact info returned by the node - (optional, defaults to doing nothing with the results) - @type errback: C{method} - @param errback: the method to call if an error occurs - (optional, defaults to calling the callback with None) - """ - n = self.Node(NULL_ID, host, port) - self.sendJoin(n, callback=callback, errback=errback) - - def findNode(self, id, callback, errback=None): - """Find the contact info for the K closest nodes in the global table. - - @type id: C{string} - @param id: the target ID to find the K closest nodes of - @type callback: C{method} - @param callback: the method to call with the results, it must take 1 - parameter, the list of K closest nodes - @type errback: C{method} - @param errback: the method to call if an error occurs - (optional, defaults to doing nothing when an error occurs) - """ - # Get K nodes out of local table/cache - nodes = self.table.findNodes(id) - d = Deferred() - if errback: - d.addCallbacks(callback, errback) - else: - d.addCallback(callback) - - # If the target ID was found - if len(nodes) == 1 and nodes[0].id == id: - d.callback(nodes) - else: - # Start the finding nodes action - state = FindNode(self, id, d.callback, self.config) - reactor.callLater(0, state.goWithNodes, nodes) - - def insertNode(self, node, contacted = True): - """Try to insert a node in our local table, pinging oldest contact if necessary. - - If all you have is a host/port, then use L{addContact}, which calls this - method after receiving the PONG from the remote node. The reason for - the seperation is we can't insert a node into the table without its - node ID. That means of course the node passed into this method needs - to be a properly formed Node object with a valid ID. - - @type node: L{node.Node} - @param node: the new node to try and insert - @type contacted: C{boolean} - @param contacted: whether the new node is known to be good, i.e. - responded to a request (optional, defaults to True) - """ - old = self.table.insertNode(node, contacted=contacted) - if (old and old.id != self.node.id and - (datetime.now() - old.lastSeen) > - timedelta(seconds=self.config['MIN_PING_INTERVAL'])): - - def _staleNodeHandler(oldnode = old, newnode = node): - """The pinged node never responded, so replace it.""" - self.table.replaceStaleNode(oldnode, newnode) - - def _notStaleNodeHandler(dict, old=old): - """Got a pong from the old node, so update it.""" - dict = dict['rsp'] - if dict['id'] == old.id: - self.table.justSeenNode(old.id) - - # Bucket is full, check to see if old node is still available - df = old.ping(self.node.id) - df.addCallbacks(_notStaleNodeHandler, _staleNodeHandler) - - def sendJoin(self, node, callback=None, errback=None): - """Join the DHT by pinging a bootstrap node. - - @type node: L{node.Node} - @param node: the node to send the join to - @type callback: C{method} - @param callback: the method to call with the results, it must take 1 - parameter, the contact info returned by the node - (optional, defaults to doing nothing with the results) - @type errback: C{method} - @param errback: the method to call if an error occurs - (optional, defaults to calling the callback with None) - """ - - def _pongHandler(dict, node=node, self=self, callback=callback): - """Node responded properly, callback with response.""" - n = self.Node(dict['rsp']['id'], dict['_krpc_sender'][0], dict['_krpc_sender'][1]) - self.insertNode(n) - if callback: - callback((dict['rsp']['ip_addr'], dict['rsp']['port'])) - - def _defaultPong(err, node=node, table=self.table, callback=callback, errback=errback): - """Error occurred, fail node and errback or callback with error.""" - table.nodeFailed(node) - if errback: - errback() - elif callback: - callback(None) - - df = node.join(self.node.id) - df.addCallbacks(_pongHandler, _defaultPong) - - def findCloseNodes(self, callback=lambda a: None, errback = None): - """Perform a findNode on the ID one away from our own. - - This will allow us to populate our table with nodes on our network - closest to our own. This is called as soon as we start up with an - empty table. - - @type callback: C{method} - @param callback: the method to call with the results, it must take 1 - parameter, the list of K closest nodes - (optional, defaults to doing nothing with the results) - @type errback: C{method} - @param errback: the method to call if an error occurs - (optional, defaults to doing nothing when an error occurs) - """ - id = self.node.id[:-1] + chr((ord(self.node.id[-1]) + 1) % 256) - self.findNode(id, callback, errback) - - def refreshTable(self, force = False): - """Check all the buckets for those that need refreshing. - - @param force: refresh all buckets regardless of last bucket access time - (optional, defaults to False) - """ - def callback(nodes): - pass - - for bucket in self.table.buckets: - if force or (datetime.now() - bucket.lastAccessed > - timedelta(seconds=self.config['BUCKET_STALENESS'])): - # Choose a random ID in the bucket and try and find it - id = newIDInRange(bucket.min, bucket.max) - self.findNode(id, callback) - - def stats(self): - """Collect some statistics about the DHT. - - @rtype: (C{int}, C{int}) - @return: the number contacts in our routing table, and the estimated - number of nodes in the entire DHT - """ - num_contacts = reduce(lambda a, b: a + len(b.l), self.table.buckets, 0) - num_nodes = self.config['K'] * (2**(len(self.table.buckets) - 1)) - return (num_contacts, num_nodes) - - def shutdown(self): - """Closes the port and cancels pending later calls.""" - self.listenport.stopListening() - try: - self.next_checkpoint.cancel() - except: - pass - self.store.close() - - #{ Remote interface - def krpc_ping(self, id, _krpc_sender): - """Pong with our ID. - - @type id: C{string} - @param id: the node ID of the sender node - @type _krpc_sender: (C{string}, C{int}) - @param _krpc_sender: the sender node's IP address and port - """ - n = self.Node(id, _krpc_sender[0], _krpc_sender[1]) - self.insertNode(n, contacted = False) - - return {"id" : self.node.id} - - def krpc_join(self, id, _krpc_sender): - """Add the node by responding with its address and port. - - @type id: C{string} - @param id: the node ID of the sender node - @type _krpc_sender: (C{string}, C{int}) - @param _krpc_sender: the sender node's IP address and port - """ - n = self.Node(id, _krpc_sender[0], _krpc_sender[1]) - self.insertNode(n, contacted = False) - - return {"ip_addr" : _krpc_sender[0], "port" : _krpc_sender[1], "id" : self.node.id} - - def krpc_find_node(self, target, id, _krpc_sender): - """Find the K closest nodes to the target in the local routing table. - - @type target: C{string} - @param target: the target ID to find nodes for - @type id: C{string} - @param id: the node ID of the sender node - @type _krpc_sender: (C{string}, C{int}) - @param _krpc_sender: the sender node's IP address and port - """ - n = self.Node(id, _krpc_sender[0], _krpc_sender[1]) - self.insertNode(n, contacted = False) - - nodes = self.table.findNodes(target) - nodes = map(lambda node: node.contactInfo(), nodes) - token = sha(self.token_secrets[0] + _krpc_sender[0]).digest() - return {"nodes" : nodes, "token" : token, "id" : self.node.id} - - -class KhashmirRead(KhashmirBase): - """The read-only Khashmir class, which can only retrieve (not store) key/value mappings.""" - - _Node = KNodeRead - - #{ Local interface - def findValue(self, key, callback, errback=None): - """Get the nodes that have values for the key from the global table. - - @type key: C{string} - @param key: the target key to find the values for - @type callback: C{method} - @param callback: the method to call with the results, it must take 1 - parameter, the list of nodes with values - @type errback: C{method} - @param errback: the method to call if an error occurs - (optional, defaults to doing nothing when an error occurs) - """ - # Get K nodes out of local table/cache - nodes = self.table.findNodes(key) - d = Deferred() - if errback: - d.addCallbacks(callback, errback) - else: - d.addCallback(callback) - - # Search for others starting with the locally found ones - state = FindValue(self, key, d.callback, self.config) - reactor.callLater(0, state.goWithNodes, nodes) - - def valueForKey(self, key, callback, searchlocal = True): - """Get the values found for key in global table. - - Callback will be called with a list of values for each peer that - returns unique values. The final callback will be an empty list. - - @type key: C{string} - @param key: the target key to get the values for - @type callback: C{method} - @param callback: the method to call with the results, it must take 2 - parameters: the key, and the values found - @type searchlocal: C{boolean} - @param searchlocal: whether to also look for any local values - """ - # Get any local values - if searchlocal: - l = self.store.retrieveValues(key) - if len(l) > 0: - reactor.callLater(0, callback, key, l) - else: - l = [] - - def _getValueForKey(nodes, key=key, local_values=l, response=callback, self=self): - """Use the found nodes to send requests for values to.""" - state = GetValue(self, key, local_values, self.config['RETRIEVE_VALUES'], response, self.config) - reactor.callLater(0, state.goWithNodes, nodes) - - # First lookup nodes that have values for the key - self.findValue(key, _getValueForKey) - - #{ Remote interface - def krpc_find_value(self, key, id, _krpc_sender): - """Find the number of values stored locally for the key, and the K closest nodes. - - @type key: C{string} - @param key: the target key to find the values and nodes for - @type id: C{string} - @param id: the node ID of the sender node - @type _krpc_sender: (C{string}, C{int}) - @param _krpc_sender: the sender node's IP address and port - """ - n = self.Node(id, _krpc_sender[0], _krpc_sender[1]) - self.insertNode(n, contacted = False) - - nodes = self.table.findNodes(key) - nodes = map(lambda node: node.contactInfo(), nodes) - num_values = self.store.countValues(key) - return {'nodes' : nodes, 'num' : num_values, "id": self.node.id} - - def krpc_get_value(self, key, num, id, _krpc_sender): - """Retrieve the values stored locally for the key. - - @type key: C{string} - @param key: the target key to retrieve the values for - @type num: C{int} - @param num: the maximum number of values to retrieve, or 0 to - retrieve all of them - @type id: C{string} - @param id: the node ID of the sender node - @type _krpc_sender: (C{string}, C{int}) - @param _krpc_sender: the sender node's IP address and port - """ - n = self.Node(id, _krpc_sender[0], _krpc_sender[1]) - self.insertNode(n, contacted = False) - - l = self.store.retrieveValues(key) - if num == 0 or num >= len(l): - return {'values' : l, "id": self.node.id} - else: - shuffle(l) - return {'values' : l[:num], "id": self.node.id} - - -class KhashmirWrite(KhashmirRead): - """The read-write Khashmir class, which can store and retrieve key/value mappings.""" - - _Node = KNodeWrite - - #{ Local interface - def storeValueForKey(self, key, value, callback=None): - """Stores the value for the key in the global table. - - No status in this implementation, peers respond but don't indicate - status of storing values. - - @type key: C{string} - @param key: the target key to store the value for - @type value: C{string} - @param value: the value to store with the key - @type callback: C{method} - @param callback: the method to call with the results, it must take 3 - parameters: the key, the value stored, and the result of the store - (optional, defaults to doing nothing with the results) - """ - def _storeValueForKey(nodes, key=key, value=value, response=callback, self=self): - """Use the returned K closest nodes to store the key at.""" - if not response: - def _storedValueHandler(key, value, sender): - """Default callback that does nothing.""" - pass - response = _storedValueHandler - action = StoreValue(self, key, value, self.config['STORE_REDUNDANCY'], response, self.config) - reactor.callLater(0, action.goWithNodes, nodes) - - # First find the K closest nodes to operate on. - self.findNode(key, _storeValueForKey) - - #{ Remote interface - def krpc_store_value(self, key, value, token, id, _krpc_sender): - """Store the value locally with the key. - - @type key: C{string} - @param key: the target key to store the value for - @type value: C{string} - @param value: the value to store with the key - @param token: the token to confirm that this peer contacted us previously - @type id: C{string} - @param id: the node ID of the sender node - @type _krpc_sender: (C{string}, C{int}) - @param _krpc_sender: the sender node's IP address and port - """ - n = self.Node(id, _krpc_sender[0], _krpc_sender[1]) - self.insertNode(n, contacted = False) - for secret in self.token_secrets: - this_token = sha(secret + _krpc_sender[0]).digest() - if token == this_token: - self.store.storeValue(key, value) - return {"id" : self.node.id} - raise krpc.KrpcError, (krpc.KRPC_ERROR_INVALID_TOKEN, 'token is invalid, do a find_nodes to get a fresh one') - - -class Khashmir(KhashmirWrite): - """The default Khashmir class (currently the read-write L{KhashmirWrite}).""" - _Node = KNodeWrite - - -class SimpleTests(unittest.TestCase): - - timeout = 10 - DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160, - 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4, - 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000, - 'MAX_FAILURES': 3, - 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600, - 'KEY_EXPIRE': 3600, 'SPEW': False, } - - def setUp(self): - d = self.DHT_DEFAULTS.copy() - d['PORT'] = 4044 - self.a = Khashmir(d) - d = self.DHT_DEFAULTS.copy() - d['PORT'] = 4045 - self.b = Khashmir(d) - - def tearDown(self): - self.a.shutdown() - self.b.shutdown() - os.unlink(self.a.store.db) - os.unlink(self.b.store.db) - - def testAddContact(self): - self.failUnlessEqual(len(self.a.table.buckets), 1) - self.failUnlessEqual(len(self.a.table.buckets[0].l), 0) - - self.failUnlessEqual(len(self.b.table.buckets), 1) - self.failUnlessEqual(len(self.b.table.buckets[0].l), 0) - - self.a.addContact('127.0.0.1', 4045) - reactor.iterate() - reactor.iterate() - reactor.iterate() - reactor.iterate() - - self.failUnlessEqual(len(self.a.table.buckets), 1) - self.failUnlessEqual(len(self.a.table.buckets[0].l), 1) - self.failUnlessEqual(len(self.b.table.buckets), 1) - self.failUnlessEqual(len(self.b.table.buckets[0].l), 1) - - def testStoreRetrieve(self): - self.a.addContact('127.0.0.1', 4045) - reactor.iterate() - reactor.iterate() - reactor.iterate() - reactor.iterate() - self.got = 0 - self.a.storeValueForKey(sha('foo').digest(), 'foobar') - reactor.iterate() - reactor.iterate() - reactor.iterate() - reactor.iterate() - reactor.iterate() - reactor.iterate() - self.a.valueForKey(sha('foo').digest(), self._cb) - reactor.iterate() - reactor.iterate() - reactor.iterate() - reactor.iterate() - reactor.iterate() - reactor.iterate() - reactor.iterate() - - def _cb(self, key, val): - if not val: - self.failUnlessEqual(self.got, 1) - elif 'foobar' in val: - self.got = 1 - - -class MultiTest(unittest.TestCase): - - timeout = 30 - num = 20 - DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160, - 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4, - 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000, - 'MAX_FAILURES': 3, - 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600, - 'KEY_EXPIRE': 3600, 'SPEW': False, } - - def _done(self, val): - self.done = 1 - - def setUp(self): - self.l = [] - self.startport = 4088 - for i in range(self.num): - d = self.DHT_DEFAULTS.copy() - d['PORT'] = self.startport + i - self.l.append(Khashmir(d)) - reactor.iterate() - reactor.iterate() - - for i in self.l: - i.addContact('127.0.0.1', self.l[randrange(0,self.num)].port) - i.addContact('127.0.0.1', self.l[randrange(0,self.num)].port) - i.addContact('127.0.0.1', self.l[randrange(0,self.num)].port) - reactor.iterate() - reactor.iterate() - reactor.iterate() - - for i in self.l: - self.done = 0 - i.findCloseNodes(self._done) - while not self.done: - reactor.iterate() - for i in self.l: - self.done = 0 - i.findCloseNodes(self._done) - while not self.done: - reactor.iterate() - - def tearDown(self): - for i in self.l: - i.shutdown() - os.unlink(i.store.db) - - reactor.iterate() - - def testStoreRetrieve(self): - for i in range(10): - K = newID() - V = newID() - - for a in range(3): - self.done = 0 - def _scb(key, value, result): - self.done = 1 - self.l[randrange(0, self.num)].storeValueForKey(K, V, _scb) - while not self.done: - reactor.iterate() - - - def _rcb(key, val): - if not val: - self.done = 1 - self.failUnlessEqual(self.got, 1) - elif V in val: - self.got = 1 - for x in range(3): - self.got = 0 - self.done = 0 - self.l[randrange(0, self.num)].valueForKey(K, _rcb) - while not self.done: - reactor.iterate() diff --git a/apt_dht_Khashmir/knode.py b/apt_dht_Khashmir/knode.py deleted file mode 100644 index e7fb6b3..0000000 --- a/apt_dht_Khashmir/knode.py +++ /dev/null @@ -1,78 +0,0 @@ -## Copyright 2002-2004 Andrew Loewenstern, All Rights Reserved -# see LICENSE.txt for license information - -"""Represents a khashmir node in the DHT.""" - -from twisted.python import log - -from node import Node, NULL_ID - -class KNodeBase(Node): - """A basic node that can only be pinged and help find other nodes.""" - - def checkSender(self, dict): - """Check the sender's info to make sure it meets expectations.""" - try: - senderid = dict['rsp']['id'] - except KeyError: - log.msg("No peer id in response") - raise Exception, "No peer id in response." - else: - if self.id != NULL_ID and senderid != self.id: - log.msg("Got response from different node than expected.") - self.table.invalidateNode(self) - - return dict - - def errBack(self, err): - """Log an error that has occurred.""" - log.err(err) - return err - - def ping(self, id): - """Ping the node.""" - df = self.conn.sendRequest('ping', {"id":id}) - df.addErrback(self.errBack) - df.addCallback(self.checkSender) - return df - - def join(self, id): - """Use the node to bootstrap into the system.""" - df = self.conn.sendRequest('join', {"id":id}) - df.addErrback(self.errBack) - df.addCallback(self.checkSender) - return df - - def findNode(self, id, target): - """Request the nearest nodes to the target that the node knows about.""" - df = self.conn.sendRequest('find_node', {"target" : target, "id": id}) - df.addErrback(self.errBack) - df.addCallback(self.checkSender) - return df - -class KNodeRead(KNodeBase): - """More advanced node that can also find and send values.""" - - def findValue(self, id, key): - """Request the nearest nodes to the key that the node knows about.""" - df = self.conn.sendRequest('find_value', {"key" : key, "id" : id}) - df.addErrback(self.errBack) - df.addCallback(self.checkSender) - return df - - def getValue(self, id, key, num): - """Request the values that the node has for the key.""" - df = self.conn.sendRequest('get_value', {"key" : key, "num": num, "id" : id}) - df.addErrback(self.errBack) - df.addCallback(self.checkSender) - return df - -class KNodeWrite(KNodeRead): - """Most advanced node that can also store values.""" - - def storeValue(self, id, key, value, token): - """Store a value in the node.""" - df = self.conn.sendRequest('store_value', {"key" : key, "value" : value, "token" : token, "id": id}) - df.addErrback(self.errBack) - df.addCallback(self.checkSender) - return df diff --git a/apt_dht_Khashmir/krpc.py b/apt_dht_Khashmir/krpc.py deleted file mode 100644 index a4fbacc..0000000 --- a/apt_dht_Khashmir/krpc.py +++ /dev/null @@ -1,561 +0,0 @@ -## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved -# see LICENSE.txt for license information - -"""The KRPC communication protocol implementation. - -@var KRPC_TIMEOUT: the number of seconds after which requests timeout -@var UDP_PACKET_LIMIT: the maximum number of bytes that can be sent in a - UDP packet without fragmentation - -@var KRPC_ERROR: the code for a generic error -@var KRPC_ERROR_SERVER_ERROR: the code for a server error -@var KRPC_ERROR_MALFORMED_PACKET: the code for a malformed packet error -@var KRPC_ERROR_METHOD_UNKNOWN: the code for a method unknown error -@var KRPC_ERROR_MALFORMED_REQUEST: the code for a malformed request error -@var KRPC_ERROR_INVALID_TOKEN: the code for an invalid token error -@var KRPC_ERROR_RESPONSE_TOO_LONG: the code for a response too long error - -@var KRPC_ERROR_INTERNAL: the code for an internal error -@var KRPC_ERROR_RECEIVED_UNKNOWN: the code for an unknown message type error -@var KRPC_ERROR_TIMEOUT: the code for a timeout error -@var KRPC_ERROR_PROTOCOL_STOPPED: the code for a stopped protocol error - -@var TID: the identifier for the transaction ID -@var REQ: the identifier for a request packet -@var RSP: the identifier for a response packet -@var TYP: the identifier for the type of packet -@var ARG: the identifier for the argument to the request -@var ERR: the identifier for an error packet - -@group Remote node error codes: KRPC_ERROR, KRPC_ERROR_SERVER_ERROR, - KRPC_ERROR_MALFORMED_PACKET, KRPC_ERROR_METHOD_UNKNOWN, - KRPC_ERROR_MALFORMED_REQUEST, KRPC_ERROR_INVALID_TOKEN, - KRPC_ERROR_RESPONSE_TOO_LONG -@group Local node error codes: KRPC_ERROR_INTERNAL, KRPC_ERROR_RECEIVED_UNKNOWN, - KRPC_ERROR_TIMEOUT, KRPC_ERROR_PROTOCOL_STOPPED -@group Command identifiers: TID, REQ, RSP, TYP, ARG, ERR - -""" - -from bencode import bencode, bdecode -from time import asctime -from math import ceil - -from twisted.internet.defer import Deferred -from twisted.internet import protocol, reactor -from twisted.python import log -from twisted.trial import unittest - -from khash import newID - -KRPC_TIMEOUT = 20 -UDP_PACKET_LIMIT = 1472 - -# Remote node errors -KRPC_ERROR = 200 -KRPC_ERROR_SERVER_ERROR = 201 -KRPC_ERROR_MALFORMED_PACKET = 202 -KRPC_ERROR_METHOD_UNKNOWN = 203 -KRPC_ERROR_MALFORMED_REQUEST = 204 -KRPC_ERROR_INVALID_TOKEN = 205 -KRPC_ERROR_RESPONSE_TOO_LONG = 206 - -# Local errors -KRPC_ERROR_INTERNAL = 100 -KRPC_ERROR_RECEIVED_UNKNOWN = 101 -KRPC_ERROR_TIMEOUT = 102 -KRPC_ERROR_PROTOCOL_STOPPED = 103 - -# commands -TID = 't' -REQ = 'q' -RSP = 'r' -TYP = 'y' -ARG = 'a' -ERR = 'e' - -class KrpcError(Exception): - """An error occurred in the KRPC protocol.""" - pass - -def verifyMessage(msg): - """Check received message for corruption and errors. - - @type msg: C{dictionary} - @param msg: the dictionary of information received on the connection - @raise KrpcError: if the message is corrupt - """ - - if type(msg) != dict: - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "not a dictionary") - if TYP not in msg: - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "no message type") - if msg[TYP] == REQ: - if REQ not in msg: - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "request type not specified") - if type(msg[REQ]) != str: - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "request type is not a string") - if ARG not in msg: - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "no arguments for request") - if type(msg[ARG]) != dict: - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "arguments for request are not in a dictionary") - elif msg[TYP] == RSP: - if RSP not in msg: - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "response not specified") - if type(msg[RSP]) != dict: - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "response is not a dictionary") - elif msg[TYP] == ERR: - if ERR not in msg: - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error not specified") - if type(msg[ERR]) != list: - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error is not a list") - if len(msg[ERR]) != 2: - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error is not a 2-element list") - if type(msg[ERR][0]) not in (int, long): - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error number is not a number") - if type(msg[ERR][1]) != str: - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error string is not a string") -# else: -# raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "unknown message type") - if TID not in msg: - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "no transaction ID specified") - if type(msg[TID]) != str: - raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "transaction id is not a string") - -class hostbroker(protocol.DatagramProtocol): - """The factory for the KRPC protocol. - - @type server: L{khashmir.Khashmir} - @ivar server: the main Khashmir program - @type config: C{dictionary} - @ivar config: the configuration parameters for the DHT - @type connections: C{dictionary} - @ivar connections: all the connections that have ever been made to the - protocol, keys are IP address and port pairs, values are L{KRPC} - protocols for the addresses - @ivar protocol: the protocol to use to handle incoming connections - (added externally) - @type addr: (C{string}, C{int}) - @ivar addr: the IP address and port of this node - """ - - def __init__(self, server, config): - """Initialize the factory. - - @type server: L{khashmir.Khashmir} - @param server: the main DHT program - @type config: C{dictionary} - @param config: the configuration parameters for the DHT - """ - self.server = server - self.config = config - # this should be changed to storage that drops old entries - self.connections = {} - - def datagramReceived(self, datagram, addr): - """Optionally create a new protocol object, and handle the new datagram. - - @type datagram: C{string} - @param datagram: the data received from the transport. - @type addr: (C{string}, C{int}) - @param addr: source IP address and port of datagram. - """ - c = self.connectionForAddr(addr) - c.datagramReceived(datagram, addr) - #if c.idle(): - # del self.connections[addr] - - def connectionForAddr(self, addr): - """Get a protocol object for the source. - - @type addr: (C{string}, C{int}) - @param addr: source IP address and port of datagram. - """ - # Don't connect to ourself - if addr == self.addr: - raise KrcpError - - # Create a new protocol object if necessary - if not self.connections.has_key(addr): - conn = self.protocol(addr, self.server, self.transport, self.config['SPEW']) - self.connections[addr] = conn - else: - conn = self.connections[addr] - return conn - - def makeConnection(self, transport): - """Make a connection to a transport and save our address.""" - protocol.DatagramProtocol.makeConnection(self, transport) - tup = transport.getHost() - self.addr = (tup.host, tup.port) - - def stopProtocol(self): - """Stop all the open connections.""" - for conn in self.connections.values(): - conn.stop() - protocol.DatagramProtocol.stopProtocol(self) - -class KRPC: - """The KRPC protocol implementation. - - @ivar transport: the transport to use for the protocol - @type factory: L{khashmir.Khashmir} - @ivar factory: the main Khashmir program - @type addr: (C{string}, C{int}) - @ivar addr: the IP address and port of the source node - @type noisy: C{boolean} - @ivar noisy: whether to log additional details of the protocol - @type tids: C{dictionary} - @ivar tids: the transaction IDs outstanding for requests, keys are the - transaction ID of the request, values are the deferreds to call with - the results - @type stopped: C{boolean} - @ivar stopped: whether the protocol has been stopped - """ - - def __init__(self, addr, server, transport, spew = False): - """Initialize the protocol. - - @type addr: (C{string}, C{int}) - @param addr: the IP address and port of the source node - @type server: L{khashmir.Khashmir} - @param server: the main Khashmir program - @param transport: the transport to use for the protocol - @type spew: C{boolean} - @param spew: whether to log additional details of the protocol - (optional, defaults to False) - """ - self.transport = transport - self.factory = server - self.addr = addr - self.noisy = spew - self.tids = {} - self.stopped = False - - def datagramReceived(self, data, addr): - """Process the new datagram. - - @type data: C{string} - @param data: the data received from the transport. - @type addr: (C{string}, C{int}) - @param addr: source IP address and port of datagram. - """ - if self.stopped: - if self.noisy: - log.msg("stopped, dropping message from %r: %s" % (addr, data)) - - # Bdecode the message - try: - msg = bdecode(data) - except Exception, e: - if self.noisy: - log.msg("krpc bdecode error: ") - log.err(e) - return - - # Make sure the remote node isn't trying anything funny - try: - verifyMessage(msg) - except Exception, e: - log.msg("krpc message verification error: ") - log.err(e) - return - - if self.noisy: - log.msg("%d received from %r: %s" % (self.factory.port, addr, msg)) - - # Process it based on its type - if msg[TYP] == REQ: - ilen = len(data) - - # Requests are handled by the factory - f = getattr(self.factory ,"krpc_" + msg[REQ], None) - msg[ARG]['_krpc_sender'] = self.addr - if f and callable(f): - try: - ret = f(*(), **msg[ARG]) - except KrpcError, e: - log.msg('Got a Krpc error while running: krpc_%s' % msg[REQ]) - log.err(e) - olen = self._sendResponse(addr, msg[TID], ERR, [e[0], e[1]]) - except TypeError, e: - log.msg('Got a malformed request for: krpc_%s' % msg[REQ]) - log.err(e) - olen = self._sendResponse(addr, msg[TID], ERR, - [KRPC_ERROR_MALFORMED_REQUEST, str(e)]) - except Exception, e: - log.msg('Got an unknown error while running: krpc_%s' % msg[REQ]) - log.err(e) - olen = self._sendResponse(addr, msg[TID], ERR, - [KRPC_ERROR_SERVER_ERROR, str(e)]) - else: - olen = self._sendResponse(addr, msg[TID], RSP, ret) - else: - # Request for unknown method - log.msg("ERROR: don't know about method %s" % msg[REQ]) - olen = self._sendResponse(addr, msg[TID], ERR, - [KRPC_ERROR_METHOD_UNKNOWN, "unknown method "+str(msg[REQ])]) - if self.noisy: - log.msg("%s >>> %s - %s %s %s" % (addr, self.factory.node.port, - ilen, msg[REQ], olen)) - elif msg[TYP] == RSP: - # Responses get processed by their TID's deferred - if self.tids.has_key(msg[TID]): - df = self.tids[msg[TID]] - # callback - del(self.tids[msg[TID]]) - df.callback({'rsp' : msg[RSP], '_krpc_sender': addr}) - else: - # no tid, this transaction timed out already... - if self.noisy: - log.msg('timeout: %r' % msg[RSP]['id']) - elif msg[TYP] == ERR: - # Errors get processed by their TID's deferred's errback - if self.tids.has_key(msg[TID]): - df = self.tids[msg[TID]] - del(self.tids[msg[TID]]) - # callback - df.errback(KrpcError(*msg[ERR])) - else: - # day late and dollar short, just log it - log.msg("Got an error for an unknown request: %r" % (msg[ERR], )) - pass - else: - # Received an unknown message type - if self.noisy: - log.msg("unknown message type: %r" % msg) - if msg[TID] in self.tids: - df = self.tids[msg[TID]] - del(self.tids[msg[TID]]) - # callback - df.errback(KrpcError(KRPC_ERROR_RECEIVED_UNKNOWN, - "Received an unknown message type: %r" % msg[TYP])) - - def _sendResponse(self, addr, tid, msgType, response): - """Helper function for sending responses to nodes. - - @type addr: (C{string}, C{int}) - @param addr: source IP address and port of datagram. - @param tid: the transaction ID of the request - @param msgType: the type of message to respond with - @param response: the arguments for the response - """ - if not response: - response = {} - - try: - # Create the response message - msg = {TID : tid, TYP : msgType, msgType : response} - - if self.noisy: - log.msg("%d responding to %r: %s" % (self.factory.port, addr, msg)) - - out = bencode(msg) - - # Make sure its not too long - if len(out) > UDP_PACKET_LIMIT: - # Can we remove some values to shorten it? - if 'values' in response: - # Save the original list of values - orig_values = response['values'] - len_orig_values = len(bencode(orig_values)) - - # Caclulate the maximum value length possible - max_len_values = len_orig_values - (len(out) - UDP_PACKET_LIMIT) - assert max_len_values > 0 - - # Start with a calculation of how many values should be included - # (assumes all values are the same length) - per_value = (float(len_orig_values) - 2.0) / float(len(orig_values)) - num_values = len(orig_values) - int(ceil(float(len(out) - UDP_PACKET_LIMIT) / per_value)) - - # Do a linear search for the actual maximum number possible - bencoded_values = len(bencode(orig_values[:num_values])) - while bencoded_values < max_len_values and num_values + 1 < len(orig_values): - bencoded_values += len(bencode(orig_values[num_values])) - num_values += 1 - while bencoded_values > max_len_values and num_values > 0: - num_values -= 1 - bencoded_values -= len(bencode(orig_values[num_values])) - assert num_values > 0 - - # Encode the result - response['values'] = orig_values[:num_values] - out = bencode(msg) - assert len(out) < UDP_PACKET_LIMIT - log.msg('Shortened a long packet from %d to %d values, new packet length: %d' % - (len(orig_values), num_values, len(out))) - else: - # Too long a response, send an error - log.msg('Could not send response, too long: %d bytes' % len(out)) - msg = {TID : tid, TYP : ERR, ERR : [KRPC_ERROR_RESPONSE_TOO_LONG, "response was %d bytes" % len(out)]} - out = bencode(msg) - - except Exception, e: - # Unknown error, send an error message - msg = {TID : tid, TYP : ERR, ERR : [KRPC_ERROR_SERVER_ERROR, "unknown error sending response: %s" % str(e)]} - out = bencode(msg) - - self.transport.write(out, addr) - return len(out) - - def sendRequest(self, method, args): - """Send a request to the remote node. - - @type method: C{string} - @param method: the methiod name to call on the remote node - @param args: the arguments to send to the remote node's method - """ - if self.stopped: - raise KrpcError, (KRPC_ERROR_PROTOCOL_STOPPED, "cannot send, connection has been stopped") - - # Create the request message - msg = {TID : newID(), TYP : REQ, REQ : method, ARG : args} - if self.noisy: - log.msg("%d sending to %r: %s" % (self.factory.port, self.addr, msg)) - data = bencode(msg) - - # Create the deferred and save it with the TID - d = Deferred() - self.tids[msg[TID]] = d - - # Schedule a later timeout call - def timeOut(tids = self.tids, id = msg[TID], method = method, addr = self.addr): - """Call the deferred's errback if a timeout occurs.""" - if tids.has_key(id): - df = tids[id] - del(tids[id]) - df.errback(KrpcError(KRPC_ERROR_TIMEOUT, "timeout waiting for '%s' from %r" % (method, addr))) - later = reactor.callLater(KRPC_TIMEOUT, timeOut) - - # Cancel the timeout call if a response is received - def dropTimeOut(dict, later_call = later): - """Cancel the timeout call when a response is received.""" - if later_call.active(): - later_call.cancel() - return dict - d.addBoth(dropTimeOut) - - self.transport.write(data, self.addr) - return d - - def stop(self): - """Timeout all pending requests.""" - for df in self.tids.values(): - df.errback(KrpcError(KRPC_ERROR_PROTOCOL_STOPPED, 'connection has been stopped while waiting for response')) - self.tids = {} - self.stopped = True - -#{ For testing the KRPC protocol -def connectionForAddr(host, port): - return host - -class Receiver(protocol.Factory): - protocol = KRPC - def __init__(self): - self.buf = [] - def krpc_store(self, msg, _krpc_sender): - self.buf += [msg] - return {} - def krpc_echo(self, msg, _krpc_sender): - return {'msg': msg} - def krpc_values(self, length, num, _krpc_sender): - return {'values': ['1'*length]*num} - -def make(port): - af = Receiver() - a = hostbroker(af, {'SPEW': False}) - a.protocol = KRPC - p = reactor.listenUDP(port, a) - return af, a, p - -class KRPCTests(unittest.TestCase): - timeout = 2 - - def setUp(self): - self.af, self.a, self.ap = make(1180) - self.bf, self.b, self.bp = make(1181) - - def tearDown(self): - self.ap.stopListening() - self.bp.stopListening() - - def bufEquals(self, result, value): - self.failUnlessEqual(self.bf.buf, value) - - def testSimpleMessage(self): - d = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('store', {'msg' : "This is a test."}) - d.addCallback(self.bufEquals, ["This is a test."]) - return d - - def testMessageBlast(self): - for i in range(100): - d = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('store', {'msg' : "This is a test."}) - d.addCallback(self.bufEquals, ["This is a test."] * 100) - return d - - def testEcho(self): - df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."}) - df.addCallback(self.gotMsg, "This is a test.") - return df - - def gotMsg(self, dict, should_be): - _krpc_sender = dict['_krpc_sender'] - msg = dict['rsp'] - self.failUnlessEqual(msg['msg'], should_be) - - def testManyEcho(self): - for i in xrange(100): - df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."}) - df.addCallback(self.gotMsg, "This is a test.") - return df - - def testMultiEcho(self): - df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."}) - df.addCallback(self.gotMsg, "This is a test.") - - df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is another test."}) - df.addCallback(self.gotMsg, "This is another test.") - - df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is yet another test."}) - df.addCallback(self.gotMsg, "This is yet another test.") - - return df - - def testEchoReset(self): - df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."}) - df.addCallback(self.gotMsg, "This is a test.") - - df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is another test."}) - df.addCallback(self.gotMsg, "This is another test.") - df.addCallback(self.echoReset) - return df - - def echoReset(self, dict): - del(self.a.connections[('127.0.0.1', 1181)]) - df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is yet another test."}) - df.addCallback(self.gotMsg, "This is yet another test.") - return df - - def testUnknownMeth(self): - df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('blahblah', {'msg' : "This is a test."}) - df.addBoth(self.gotErr, KRPC_ERROR_METHOD_UNKNOWN) - return df - - def testMalformedRequest(self): - df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test.", 'foo': 'bar'}) - df.addBoth(self.gotErr, KRPC_ERROR_MALFORMED_REQUEST) - return df - - def gotErr(self, err, should_be): - self.failUnlessEqual(err.value[0], should_be) - - def testLongPackets(self): - df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('values', {'length' : 1, 'num': 2000}) - df.addCallback(self.gotLongRsp) - return df - - def gotLongRsp(self, dict): - # Not quite accurate, but good enough - self.failUnless(len(bencode(dict))-10 < UDP_PACKET_LIMIT) - \ No newline at end of file diff --git a/apt_dht_Khashmir/ktable.py b/apt_dht_Khashmir/ktable.py deleted file mode 100644 index fb0c371..0000000 --- a/apt_dht_Khashmir/ktable.py +++ /dev/null @@ -1,335 +0,0 @@ -## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved -# see LICENSE.txt for license information - -"""The routing table and buckets for a kademlia-like DHT.""" - -from datetime import datetime -from bisect import bisect_left - -from twisted.python import log -from twisted.trial import unittest - -import khash -from node import Node, NULL_ID - -class KTable: - """Local routing table for a kademlia-like distributed hash table. - - @type node: L{node.Node} - @ivar node: the local node - @type config: C{dictionary} - @ivar config: the configuration parameters for the DHT - @type buckets: C{list} of L{KBucket} - @ivar buckets: the buckets of nodes in the routing table - """ - - def __init__(self, node, config): - """Initialize the first empty bucket of everything. - - @type node: L{node.Node} - @param node: the local node - @type config: C{dictionary} - @param config: the configuration parameters for the DHT - """ - # this is the root node, a.k.a. US! - assert node.id != NULL_ID - self.node = node - self.config = config - self.buckets = [KBucket([], 0L, 2L**self.config['HASH_LENGTH'])] - - def _bucketIndexForInt(self, num): - """Find the index of the bucket that should hold the node's ID number.""" - return bisect_left(self.buckets, num) - - def findNodes(self, id): - """Find the K nodes in our own local table closest to the ID. - - @type id: C{string} of C{int} or L{node.Node} - @param id: the ID to find nodes that are close to - @raise TypeError: if id does not properly identify an ID - """ - - # Get the ID number from the input - if isinstance(id, str): - num = khash.intify(id) - elif isinstance(id, Node): - num = id.num - elif isinstance(id, int) or isinstance(id, long): - num = id - else: - raise TypeError, "findNodes requires an int, string, or Node" - - nodes = [] - i = self._bucketIndexForInt(num) - - # If this node is already in our table then return it - try: - index = self.buckets[i].l.index(num) - except ValueError: - pass - else: - return [self.buckets[i].l[index]] - - # Don't have the node, get the K closest nodes from the appropriate bucket - nodes = nodes + self.buckets[i].l - - # Make sure we have enough - if len(nodes) < self.config['K']: - # Look in adjoining buckets for nodes - min = i - 1 - max = i + 1 - while len(nodes) < self.config['K'] and (min >= 0 or max < len(self.buckets)): - # Add the adjoining buckets' nodes to the list - if min >= 0: - nodes = nodes + self.buckets[min].l - if max < len(self.buckets): - nodes = nodes + self.buckets[max].l - min = min - 1 - max = max + 1 - - # Sort the found nodes by proximity to the id and return the closest K - nodes.sort(lambda a, b, num=num: cmp(num ^ a.num, num ^ b.num)) - return nodes[:self.config['K']] - - def _splitBucket(self, a): - """Split a bucket in two. - - @type a: L{KBucket} - @param a: the bucket to split - """ - # Create a new bucket with half the (upper) range of the current bucket - diff = (a.max - a.min) / 2 - b = KBucket([], a.max - diff, a.max) - self.buckets.insert(self.buckets.index(a.min) + 1, b) - - # Reduce the input bucket's (upper) range - a.max = a.max - diff - - # Transfer nodes to the new bucket - for anode in a.l[:]: - if anode.num >= a.max: - a.l.remove(anode) - b.l.append(anode) - - def replaceStaleNode(self, stale, new = None): - """Replace a stale node in a bucket with a new one. - - This is used by clients to replace a node returned by insertNode after - it fails to respond to a ping. - - @type stale: L{node.Node} - @param stale: the stale node to remove from the bucket - @type new: L{node.Node} - @param new: the new node to add in it's place (optional, defaults to - not adding any node in the old node's place) - """ - # Find the stale node's bucket - i = self._bucketIndexForInt(stale.num) - try: - it = self.buckets[i].l.index(stale.num) - except ValueError: - return - - # Remove the stale node and insert the new one - del(self.buckets[i].l[it]) - if new: - self.buckets[i].l.append(new) - - def insertNode(self, node, contacted = True): - """Try to insert a node in the routing table. - - This inserts the node, returning None if successful, otherwise returns - the oldest node in the bucket if it's full. The caller is then - responsible for pinging the returned node and calling replaceStaleNode - if it doesn't respond. contacted means that yes, we contacted THEM and - we know the node is reachable. - - @type node: L{node.Node} - @param node: the new node to try and insert - @type contacted: C{boolean} - @param contacted: whether the new node is known to be good, i.e. - responded to a request (optional, defaults to True) - @rtype: L{node.Node} - @return: None if successful (the bucket wasn't full), otherwise returns the oldest node in the bucket - """ - assert node.id != NULL_ID - if node.id == self.node.id: return - - # Get the bucket for this node - i = self. _bucketIndexForInt(node.num) - - # Check to see if node is in the bucket already - try: - it = self.buckets[i].l.index(node.num) - except ValueError: - pass - else: - # The node is already in the bucket - if contacted: - # It responded, so update it - node.updateLastSeen() - # move node to end of bucket - xnode = self.buckets[i].l[it] - del(self.buckets[i].l[it]) - # note that we removed the original and replaced it with the new one - # utilizing this nodes new contact info - self.buckets[i].l.append(xnode) - self.buckets[i].touch() - return - - # We don't have this node, check to see if the bucket is full - if len(self.buckets[i].l) < self.config['K']: - # Not full, append this node and return - if contacted: - node.updateLastSeen() - self.buckets[i].l.append(node) - self.buckets[i].touch() - return - - # Bucket is full, check to see if the local node is not in the bucket - if not (self.buckets[i].min <= self.node < self.buckets[i].max): - # Local node not in the bucket, can't split it, return the oldest node - return self.buckets[i].l[0] - - # Make sure our table isn't FULL, this is really unlikely - if len(self.buckets) >= self.config['HASH_LENGTH']: - log.err("Hash Table is FULL! Increase K!") - return - - # This bucket is full and contains our node, split the bucket - self._splitBucket(self.buckets[i]) - - # Now that the bucket is split and balanced, try to insert the node again - return self.insertNode(node) - - def justSeenNode(self, id): - """Mark a node as just having been seen. - - Call this any time you get a message from a node, it will update it - in the table if it's there. - - @type id: C{string} of C{int} or L{node.Node} - @param id: the node ID to mark as just having been seen - @rtype: C{datetime.datetime} - @return: the old lastSeen time of the node, or None if it's not in the table - """ - try: - n = self.findNodes(id)[0] - except IndexError: - return None - else: - tstamp = n.lastSeen - n.updateLastSeen() - return tstamp - - def invalidateNode(self, n): - """Remove the node from the routing table. - - Forget about node n. Use this when you know that a node is invalid. - """ - self.replaceStaleNode(n) - - def nodeFailed(self, node): - """Mark a node as having failed once, and remove it if it has failed too much.""" - try: - n = self.findNodes(node.num)[0] - except IndexError: - return None - else: - if n.msgFailed() >= self.config['MAX_FAILURES']: - self.invalidateNode(n) - -class KBucket: - """Single bucket of nodes in a kademlia-like routing table. - - @type l: C{list} of L{node.Node} - @ivar l: the nodes that are in this bucket - @type min: C{long} - @ivar min: the minimum node ID that can be in this bucket - @type max: C{long} - @ivar max: the maximum node ID that can be in this bucket - @type lastAccessed: C{datetime.datetime} - @ivar lastAccessed: the last time a node in this bucket was successfully contacted - """ - - def __init__(self, contents, min, max): - """Initialize the bucket with nodes. - - @type contents: C{list} of L{node.Node} - @param contents: the nodes to store in the bucket - @type min: C{long} - @param min: the minimum node ID that can be in this bucket - @type max: C{long} - @param max: the maximum node ID that can be in this bucket - """ - self.l = contents - self.min = min - self.max = max - self.lastAccessed = datetime.now() - - def touch(self): - """Update the L{lastAccessed} time.""" - self.lastAccessed = datetime.now() - - def getNodeWithInt(self, num): - """Get the node in the bucket with that number. - - @type num: C{long} - @param num: the node ID to look for - @raise ValueError: if the node ID is not in the bucket - @rtype: L{node.Node} - @return: the node - """ - if num in self.l: return num - else: raise ValueError - - def __repr__(self): - return "" % (len(self.l), self.min, self.max) - - #{ Comparators to bisect/index a list of buckets (by their range) with either a node or a long - def __lt__(self, a): - if isinstance(a, Node): a = a.num - return self.max <= a - def __le__(self, a): - if isinstance(a, Node): a = a.num - return self.min < a - def __gt__(self, a): - if isinstance(a, Node): a = a.num - return self.min > a - def __ge__(self, a): - if isinstance(a, Node): a = a.num - return self.max >= a - def __eq__(self, a): - if isinstance(a, Node): a = a.num - return self.min <= a and self.max > a - def __ne__(self, a): - if isinstance(a, Node): a = a.num - return self.min >= a or self.max < a - -class TestKTable(unittest.TestCase): - """Unit tests for the routing table.""" - - def setUp(self): - self.a = Node(khash.newID(), '127.0.0.1', 2002) - self.t = KTable(self.a, {'HASH_LENGTH': 160, 'K': 8, 'MAX_FAILURES': 3}) - - def testAddNode(self): - self.b = Node(khash.newID(), '127.0.0.1', 2003) - self.t.insertNode(self.b) - self.failUnlessEqual(len(self.t.buckets[0].l), 1) - self.failUnlessEqual(self.t.buckets[0].l[0], self.b) - - def testRemove(self): - self.testAddNode() - self.t.invalidateNode(self.b) - self.failUnlessEqual(len(self.t.buckets[0].l), 0) - - def testFail(self): - self.testAddNode() - for i in range(self.t.config['MAX_FAILURES'] - 1): - self.t.nodeFailed(self.b) - self.failUnlessEqual(len(self.t.buckets[0].l), 1) - self.failUnlessEqual(self.t.buckets[0].l[0], self.b) - - self.t.nodeFailed(self.b) - self.failUnlessEqual(len(self.t.buckets[0].l), 0) diff --git a/apt_dht_Khashmir/node.py b/apt_dht_Khashmir/node.py deleted file mode 100644 index 49b8fe7..0000000 --- a/apt_dht_Khashmir/node.py +++ /dev/null @@ -1,143 +0,0 @@ -## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved -# see LICENSE.txt for license information - -"""Represents a node in the DHT. - -@type NULL_ID: C{string} -@var NULL_ID: the node ID to use until one is known -""" - -from datetime import datetime, MINYEAR -from types import InstanceType - -from twisted.trial import unittest - -import khash -from util import compact - -# magic id to use before we know a peer's id -NULL_ID = 20 * '\0' - -class Node: - """Encapsulate a node's contact info. - - @ivar conn: the connection to the remote node (added externally) - @ivar table: the routing table (added externally) - @type fails: C{int} - @ivar fails: number of times this node has failed in a row - @type lastSeen: C{datetime.datetime} - @ivar lastSeen: the last time a response was received from this node - @type id: C{string} - @ivar id: the node's ID in the DHT - @type num: C{long} - @ivar num: the node's ID in number form - @type host: C{string} - @ivar host: the IP address of the node - @type port: C{int} - @ivar port: the port of the node - @type token: C{string} - @ivar token: the last received token from the node - @type num_values: C{int} - @ivar num_values: the number of values the node has for the key in the - currently executing action - """ - - def __init__(self, id, host = None, port = None): - """Initialize the node. - - @type id: C{string} or C{dictionary} - @param id: the node's ID in the DHT, or a dictionary containing the - node's id, host and port - @type host: C{string} - @param host: the IP address of the node - (optional, but must be specified if id is not a dictionary) - @type port: C{int} - @param port: the port of the node - (optional, but must be specified if id is not a dictionary) - """ - self.fails = 0 - self.lastSeen = datetime(MINYEAR, 1, 1) - - # Alternate method, init Node from dictionary - if isinstance(id, dict): - host = id['host'] - port = id['port'] - id = id['id'] - - assert isinstance(id, str) - assert isinstance(host, str) - self.id = id - self.num = khash.intify(id) - self.host = host - self.port = int(port) - self.token = '' - self.num_values = 0 - self._contactInfo = None - - def updateLastSeen(self): - """Updates the last contact time of the node and resets the number of failures.""" - self.lastSeen = datetime.now() - self.fails = 0 - - def updateToken(self, token): - """Update the token for the node.""" - self.token = token - - def updateNumValues(self, num_values): - """Update how many values the node has in the current search for a value.""" - self.num_values = num_values - - def msgFailed(self): - """Log a failed attempt to contact this node. - - @rtype: C{int} - @return: the number of consecutive failures this node has - """ - self.fails = self.fails + 1 - return self.fails - - def contactInfo(self): - """Get the compact contact info for the node.""" - if self._contactInfo is None: - self._contactInfo = compact(self.id, self.host, self.port) - return self._contactInfo - - def __repr__(self): - return `(self.id, self.host, self.port)` - - #{ Comparators to bisect/index a list of nodes with either a node or a long - def __lt__(self, a): - if type(a) == InstanceType: - a = a.num - return self.num < a - def __le__(self, a): - if type(a) == InstanceType: - a = a.num - return self.num <= a - def __gt__(self, a): - if type(a) == InstanceType: - a = a.num - return self.num > a - def __ge__(self, a): - if type(a) == InstanceType: - a = a.num - return self.num >= a - def __eq__(self, a): - if type(a) == InstanceType: - a = a.num - return self.num == a - def __ne__(self, a): - if type(a) == InstanceType: - a = a.num - return self.num != a - - -class TestNode(unittest.TestCase): - """Unit tests for the node implementation.""" - def setUp(self): - self.node = Node(khash.newID(), '127.0.0.1', 2002) - def testUpdateLastSeen(self): - t = self.node.lastSeen - self.node.updateLastSeen() - self.failUnless(t < self.node.lastSeen) - \ No newline at end of file diff --git a/apt_dht_Khashmir/util.py b/apt_dht_Khashmir/util.py deleted file mode 100644 index 52b6e97..0000000 --- a/apt_dht_Khashmir/util.py +++ /dev/null @@ -1,78 +0,0 @@ -## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved -# see LICENSE.txt for license information - -"""Some utitlity functions for use in apt-p2p's khashmir DHT.""" - -from twisted.trial import unittest - -def bucket_stats(l): - """Given a list of khashmir instances, finds min, max, and average number of nodes in tables.""" - max = avg = 0 - min = None - def count(buckets): - c = 0 - for bucket in buckets: - c = c + len(bucket.l) - return c - for node in l: - c = count(node.table.buckets) - if min == None: - min = c - elif c < min: - min = c - if c > max: - max = c - avg = avg + c - avg = avg / len(l) - return {'min':min, 'max':max, 'avg':avg} - -def uncompact(s): - """Extract the contact info from a compact node representation. - - @type s: C{string} - @param s: the compact representation - @rtype: C{dictionary} - @return: the node ID, IP address and port to contact the node on - @raise ValueError: if the compact representation doesn't exist - """ - if (len(s) != 26): - raise ValueError - id = s[:20] - host = '.'.join([str(ord(i)) for i in s[20:24]]) - port = (ord(s[24]) << 8) | ord(s[25]) - return {'id': id, 'host': host, 'port': port} - -def compact(id, host, port): - """Create a compact representation of node contact info. - - @type id: C{string} - @param id: the node ID - @type host: C{string} - @param host: the IP address of the node - @type port: C{int} - @param port: the port number to contact the node on - @rtype: C{string} - @return: the compact representation - @raise ValueError: if the compact representation doesn't exist - """ - - s = id + ''.join([chr(int(i)) for i in host.split('.')]) + \ - chr((port & 0xFF00) >> 8) + chr(port & 0xFF) - if len(s) != 26: - raise ValueError - return s - -class TestUtil(unittest.TestCase): - """Tests for the utilities.""" - - timeout = 5 - myid = '\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!' - host = '165.234.1.34' - port = 61234 - - def test_compact(self): - d = uncompact(compact(self.myid, self.host, self.port)) - self.failUnlessEqual(d['id'], self.myid) - self.failUnlessEqual(d['host'], self.host) - self.failUnlessEqual(d['port'], self.port) - \ No newline at end of file diff --git a/apt_p2p/AptPackages.py b/apt_p2p/AptPackages.py new file mode 100644 index 0000000..44c84b5 --- /dev/null +++ b/apt_p2p/AptPackages.py @@ -0,0 +1,625 @@ +# +# Copyright (C) 2002 Manuel Estrada Sainz +# Copyright (C) 2008 Cameron Dale +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Manage a mirror's index files. + +@type TRACKED_FILES: C{list} of C{string} +@var TRACKED_FILES: the file names of files that contain index information +""" + +# Disable the FutureWarning from the apt module +import warnings +warnings.simplefilter("ignore", FutureWarning) + +import os, shelve +from random import choice +from shutil import rmtree +from copy import deepcopy +from UserDict import DictMixin + +from twisted.internet import threads, defer, reactor +from twisted.python import log +from twisted.python.filepath import FilePath +from twisted.trial import unittest + +import apt_pkg, apt_inst +from apt import OpProgress +from debian_bundle import deb822 + +from Hash import HashObject + +apt_pkg.init() + +TRACKED_FILES = ['release', 'sources', 'packages'] + +class PackageFileList(DictMixin): + """Manages a list of index files belonging to a mirror. + + @type cache_dir: L{twisted.python.filepath.FilePath} + @ivar cache_dir: the directory to use for storing all files + @type packages: C{shelve dictionary} + @ivar packages: the files tracked for this mirror + """ + + def __init__(self, cache_dir): + """Initialize the list by opening the dictionary.""" + self.cache_dir = cache_dir + self.cache_dir.restat(False) + if not self.cache_dir.exists(): + self.cache_dir.makedirs() + self.packages = None + self.open() + + def open(self): + """Open the persistent dictionary of files for this mirror.""" + if self.packages is None: + self.packages = shelve.open(self.cache_dir.child('packages.db').path) + + def close(self): + """Close the persistent dictionary.""" + if self.packages is not None: + self.packages.close() + + def update_file(self, cache_path, file_path): + """Check if an updated file needs to be tracked. + + Called from the mirror manager when files get updated so we can update our + fake lists and sources.list. + + @type cache_path: C{string} + @param cache_path: the location of the file within the mirror + @type file_path: L{twisted.python.filepath.FilePath} + @param file_path: The location of the file in the file system + @rtype: C{boolean} + @return: whether the file is an index file + """ + filename = cache_path.split('/')[-1] + if filename.lower() in TRACKED_FILES: + log.msg("Registering package file: "+cache_path) + self.packages[cache_path] = file_path + return True + return False + + def check_files(self): + """Check all files in the database to remove any that don't exist.""" + files = self.packages.keys() + for f in files: + self.packages[f].restat(False) + if not self.packages[f].exists(): + log.msg("File in packages database has been deleted: "+f) + del self.packages[f] + + #{ Dictionary interface details + def __getitem__(self, key): return self.packages[key] + def __setitem__(self, key, item): self.packages[key] = item + def __delitem__(self, key): del self.packages[key] + def keys(self): return self.packages.keys() + +class AptPackages: + """Answers queries about packages available from a mirror. + + Uses the python-apt tools to parse and provide information about the + files that are available on a single mirror. + + @ivar DEFAULT_APT_CONFIG: the default configuration parameters to use for apt + @ivar essential_dirs: directories that must be created for apt to work + @ivar essential_files: files that must be created for apt to work + @type cache_dir: L{twisted.python.filepath.FilePath} + @ivar cache_dir: the directory to use for storing all files + @type unload_delay: C{int} + @ivar unload_delay: the time to wait before unloading the apt cache + @ivar apt_config: the configuration parameters to use for apt + @type packages: L{PackageFileList} + @ivar packages: the persistent storage of tracked apt index files + @type loaded: C{boolean} + @ivar loaded: whether the apt cache is currently loaded + @type loading: L{twisted.internet.defer.Deferred} + @ivar loading: if the cache is currently being loaded, this will be + called when it is loaded, otherwise it is None + @type unload_later: L{twisted.internet.interfaces.IDelayedCall} + @ivar unload_later: the delayed call to unload the apt cache + @type indexrecords: C{dictionary} + @ivar indexrecords: the hashes of index files for the mirror, keys are + mirror directories, values are dictionaries with keys the path to the + index file in the mirror directory and values are dictionaries with + keys the hash type and values the hash + @type cache: C{apt_pkg.GetCache()} + @ivar cache: the apt cache of the mirror + @type records: C{apt_pkg.GetPkgRecords()} + @ivar records: the apt package records for all binary packages in a mirror + @type srcrecords: C{apt_pkg.GetPkgSrcRecords} + @ivar srcrecords: the apt package records for all source packages in a mirror + """ + + DEFAULT_APT_CONFIG = { + #'APT' : '', + #'APT::Architecture' : 'i386', # Commented so the machine's config will set this + #'APT::Default-Release' : 'unstable', + 'Dir':'.', # / + 'Dir::State' : 'apt/', # var/lib/apt/ + 'Dir::State::Lists': 'lists/', # lists/ + #'Dir::State::cdroms' : 'cdroms.list', + 'Dir::State::userstatus' : 'status.user', + 'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status' + 'Dir::Cache' : '.apt/cache/', # var/cache/apt/ + #'Dir::Cache::archives' : 'archives/', + 'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin', + 'Dir::Cache::pkgcache' : 'pkgcache.bin', + 'Dir::Etc' : 'apt/etc/', # etc/apt/ + 'Dir::Etc::sourcelist' : 'sources.list', + 'Dir::Etc::vendorlist' : 'vendors.list', + 'Dir::Etc::vendorparts' : 'vendors.list.d', + #'Dir::Etc::main' : 'apt.conf', + #'Dir::Etc::parts' : 'apt.conf.d', + #'Dir::Etc::preferences' : 'preferences', + 'Dir::Bin' : '', + #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods' + 'Dir::Bin::dpkg' : '/usr/bin/dpkg', + #'DPkg' : '', + #'DPkg::Pre-Install-Pkgs' : '', + #'DPkg::Tools' : '', + #'DPkg::Tools::Options' : '', + #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '', + #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2', + #'DPkg::Post-Invoke' : '', + } + essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists', + 'apt/lists/partial') + essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',) + + def __init__(self, cache_dir, unload_delay): + """Construct a new packages manager. + + @param cache_dir: directory to use to store files for this mirror + """ + self.cache_dir = cache_dir + self.unload_delay = unload_delay + self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG) + + # Create the necessary files and directories for apt + for dir in self.essential_dirs: + path = self.cache_dir.preauthChild(dir) + if not path.exists(): + path.makedirs() + for file in self.essential_files: + path = self.cache_dir.preauthChild(file) + if not path.exists(): + path.touch() + + self.apt_config['Dir'] = self.cache_dir.path + self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path + self.packages = PackageFileList(cache_dir) + self.loaded = False + self.loading = None + self.unload_later = None + + def __del__(self): + self.cleanup() + + def addRelease(self, cache_path, file_path): + """Add a Release file's info to the list of index files. + + Dirty hack until python-apt supports apt-pkg/indexrecords.h + (see Bug #456141) + """ + self.indexrecords[cache_path] = {} + + read_packages = False + f = file_path.open('r') + + # Use python-debian routines to parse the file for hashes + rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256']) + for hash_type in rel: + for file in rel[hash_type]: + self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file[hash_type], file['size']) + + f.close() + + def file_updated(self, cache_path, file_path): + """A file in the mirror has changed or been added. + + If this affects us, unload our apt database. + @see: L{PackageFileList.update_file} + """ + if self.packages.update_file(cache_path, file_path): + self.unload() + + def load(self): + """Make sure the package cache is initialized and loaded.""" + # Reset the pending unload call + if self.unload_later and self.unload_later.active(): + self.unload_later.reset(self.unload_delay) + else: + self.unload_later = reactor.callLater(self.unload_delay, self.unload) + + # Make sure it's not already being loaded + if self.loading is None: + log.msg('Loading the packages cache') + self.loading = threads.deferToThread(self._load) + self.loading.addCallback(self.doneLoading) + return self.loading + + def doneLoading(self, loadResult): + """Cache is loaded.""" + self.loading = None + # Must pass on the result for the next callback + return loadResult + + def _load(self): + """Regenerates the fake configuration and loads the packages caches.""" + if self.loaded: return True + + # Modify the default configuration to create the fake one. + apt_pkg.InitSystem() + self.cache_dir.preauthChild(self.apt_config['Dir::State'] + ).preauthChild(self.apt_config['Dir::State::Lists']).remove() + self.cache_dir.preauthChild(self.apt_config['Dir::State'] + ).preauthChild(self.apt_config['Dir::State::Lists'] + ).child('partial').makedirs() + sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc'] + ).preauthChild(self.apt_config['Dir::Etc::sourcelist']) + sources = sources_file.open('w') + sources_count = 0 + deb_src_added = False + self.packages.check_files() + self.indexrecords = {} + + # Create an entry in sources.list for each needed index file + for f in self.packages: + # we should probably clear old entries from self.packages and + # take into account the recorded mtime as optimization + file = self.packages[f] + if f.split('/')[-1] == "Release": + self.addRelease(f, file) + fake_uri='http://apt-p2p'+f + fake_dirname = '/'.join(fake_uri.split('/')[:-1]) + if f.endswith('Sources'): + deb_src_added = True + source_line='deb-src '+fake_dirname+'/ /' + else: + source_line='deb '+fake_dirname+'/ /' + listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State'] + ).preauthChild(self.apt_config['Dir::State::Lists'] + ).child(apt_pkg.URItoFileName(fake_uri)) + sources.write(source_line+'\n') + log.msg("Sources line: " + source_line) + sources_count = sources_count + 1 + + if listpath.exists(): + #we should empty the directory instead + listpath.remove() + os.symlink(file.path, listpath.path) + sources.close() + + if sources_count == 0: + log.msg("No Packages files available for %s backend"%(self.cache_dir.path)) + return False + + log.msg("Loading Packages database for "+self.cache_dir.path) + for key, value in self.apt_config.items(): + apt_pkg.Config[key] = value + + self.cache = apt_pkg.GetCache(OpProgress()) + self.records = apt_pkg.GetPkgRecords(self.cache) + if deb_src_added: + self.srcrecords = apt_pkg.GetPkgSrcRecords() + else: + self.srcrecords = None + + self.loaded = True + return True + + def unload(self): + """Tries to make the packages server quit.""" + if self.unload_later and self.unload_later.active(): + self.unload_later.cancel() + self.unload_later = None + if self.loaded: + log.msg('Unloading the packages cache') + # This should save memory + del self.cache + del self.records + del self.srcrecords + del self.indexrecords + self.loaded = False + + def cleanup(self): + """Cleanup and close any loaded caches.""" + self.unload() + if self.unload_later and self.unload_later.active(): + self.unload_later.cancel() + self.packages.close() + + def findHash(self, path): + """Find the hash for a given path in this mirror. + + @type path: C{string} + @param path: the path within the mirror of the file to lookup + @rtype: L{twisted.internet.defer.Deferred} + @return: a deferred so it can make sure the cache is loaded first + """ + d = defer.Deferred() + + deferLoad = self.load() + deferLoad.addCallback(self._findHash, path, d) + deferLoad.addErrback(self._findHash_error, path, d) + + return d + + def _findHash_error(self, failure, path, d): + """An error occurred, return an empty hash.""" + log.msg('An error occurred while looking up a hash for: %s' % path) + log.err(failure) + d.callback(HashObject()) + return failure + + def _findHash(self, loadResult, path, d): + """Search the records for the hash of a path. + + @type loadResult: C{boolean} + @param loadResult: whether apt's cache was successfully loaded + @type path: C{string} + @param path: the path within the mirror of the file to lookup + @type d: L{twisted.internet.defer.Deferred} + @param d: the deferred to callback with the result + """ + if not loadResult: + d.callback(HashObject()) + return loadResult + + h = HashObject() + + # First look for the path in the cache of index files + for release in self.indexrecords: + if path.startswith(release[:-7]): + for indexFile in self.indexrecords[release]: + if release[:-7] + indexFile == path: + h.setFromIndexRecord(self.indexrecords[release][indexFile]) + d.callback(h) + return loadResult + + package = path.split('/')[-1].split('_')[0] + + # Check the binary packages + try: + for version in self.cache[package].VersionList: + size = version.Size + for verFile in version.FileList: + if self.records.Lookup(verFile): + if '/' + self.records.FileName == path: + h.setFromPkgRecord(self.records, size) + d.callback(h) + return loadResult + except KeyError: + pass + + # Check the source packages' files + if self.srcrecords: + self.srcrecords.Restart() + if self.srcrecords.Lookup(package): + for f in self.srcrecords.Files: + if path == '/' + f[2]: + h.setFromSrcRecord(f) + d.callback(h) + return loadResult + + d.callback(h) + + # Have to pass the returned loadResult on in case other calls to this function are pending. + return loadResult + +class TestAptPackages(unittest.TestCase): + """Unit tests for the AptPackages cache.""" + + pending_calls = [] + client = None + timeout = 10 + packagesFile = '' + sourcesFile = '' + releaseFile = '' + + def setUp(self): + """Initializes the cache with files found in the traditional apt location.""" + self.client = AptPackages(FilePath('/tmp/.apt-p2p'), 300) + + # Find the largest index files that are for 'main' + self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n') + self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n') + + # Find the Release file corresponding to the found Packages file + for f in os.walk('/var/lib/apt/lists').next()[2]: + if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]): + self.releaseFile = f + break + + # Add all the found files to the PackageFileList + self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'), + FilePath('/var/lib/apt/lists/' + self.releaseFile)) + self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'), + FilePath('/var/lib/apt/lists/' + self.packagesFile)) + self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'), + FilePath('/var/lib/apt/lists/' + self.sourcesFile)) + + def test_pkg_hash(self): + """Tests loading the binary package records cache.""" + self.client._load() + + self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0]) + + pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.packagesFile + + ' | grep -E "^SHA1:" | head -n 1' + + ' | cut -d\ -f 2').read().rstrip('\n') + + self.failUnless(self.client.records.SHA1Hash == pkg_hash, + "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash)) + + def test_src_hash(self): + """Tests loading the source package records cache.""" + self.client._load() + + self.client.srcrecords.Lookup('dpkg') + + src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.sourcesFile + + ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + + ' | cut -d\ -f 2').read().split('\n')[:-1] + + for f in self.client.srcrecords.Files: + self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes)) + + def test_index_hash(self): + """Tests loading the cache of index file information.""" + self.client._load() + + indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0] + + idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + + '/var/lib/apt/lists/' + self.releaseFile + + ' | grep -E " main/binary-i386/Packages.bz2$"' + ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n') + + self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash)) + + def verifyHash(self, found_hash, path, true_hash): + self.failUnless(found_hash.hexexpected() == true_hash, + "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash)) + + def test_findIndexHash(self): + """Tests finding the hash of a single index file.""" + lastDefer = defer.Deferred() + + idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + + '/var/lib/apt/lists/' + self.releaseFile + + ' | grep -E " main/binary-i386/Packages.bz2$"' + ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n') + idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2' + + d = self.client.findHash(idx_path) + d.addCallback(self.verifyHash, idx_path, idx_hash) + + d.addBoth(lastDefer.callback) + return lastDefer + + def test_findPkgHash(self): + """Tests finding the hash of a single binary package.""" + lastDefer = defer.Deferred() + + pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.packagesFile + + ' | grep -E "^SHA1:" | head -n 1' + + ' | cut -d\ -f 2').read().rstrip('\n') + pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.packagesFile + + ' | grep -E "^Filename:" | head -n 1' + + ' | cut -d\ -f 2').read().rstrip('\n') + + d = self.client.findHash(pkg_path) + d.addCallback(self.verifyHash, pkg_path, pkg_hash) + + d.addBoth(lastDefer.callback) + return lastDefer + + def test_findSrcHash(self): + """Tests finding the hash of a single source package.""" + lastDefer = defer.Deferred() + + src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.sourcesFile + + ' | grep -E "^Directory:" | head -n 1' + + ' | cut -d\ -f 2').read().rstrip('\n') + src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.sourcesFile + + ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + + ' | cut -d\ -f 2').read().split('\n')[:-1] + src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.sourcesFile + + ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + + ' | cut -d\ -f 4').read().split('\n')[:-1] + + i = choice(range(len(src_hashes))) + d = self.client.findHash(src_dir + '/' + src_paths[i]) + d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i]) + + d.addBoth(lastDefer.callback) + return lastDefer + + def test_multipleFindHash(self): + """Tests finding the hash of an index file, binary package, source package, and another index file.""" + lastDefer = defer.Deferred() + + # Lookup a Packages.bz2 file + idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + + '/var/lib/apt/lists/' + self.releaseFile + + ' | grep -E " main/binary-i386/Packages.bz2$"' + ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n') + idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2' + + d = self.client.findHash(idx_path) + d.addCallback(self.verifyHash, idx_path, idx_hash) + + # Lookup the binary 'dpkg' package + pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.packagesFile + + ' | grep -E "^SHA1:" | head -n 1' + + ' | cut -d\ -f 2').read().rstrip('\n') + pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.packagesFile + + ' | grep -E "^Filename:" | head -n 1' + + ' | cut -d\ -f 2').read().rstrip('\n') + + d = self.client.findHash(pkg_path) + d.addCallback(self.verifyHash, pkg_path, pkg_hash) + + # Lookup the source 'dpkg' package + src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.sourcesFile + + ' | grep -E "^Directory:" | head -n 1' + + ' | cut -d\ -f 2').read().rstrip('\n') + src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.sourcesFile + + ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + + ' | cut -d\ -f 2').read().split('\n')[:-1] + src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.sourcesFile + + ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + + ' | cut -d\ -f 4').read().split('\n')[:-1] + + for i in range(len(src_hashes)): + d = self.client.findHash(src_dir + '/' + src_paths[i]) + d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i]) + + # Lookup a Sources.bz2 file + idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + + '/var/lib/apt/lists/' + self.releaseFile + + ' | grep -E " main/source/Sources.bz2$"' + ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n') + idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2' + + d = self.client.findHash(idx_path) + d.addCallback(self.verifyHash, idx_path, idx_hash) + + d.addBoth(lastDefer.callback) + return lastDefer + + def tearDown(self): + for p in self.pending_calls: + if p.active(): + p.cancel() + self.pending_calls = [] + self.client.cleanup() + self.client = None diff --git a/apt_p2p/CacheManager.py b/apt_p2p/CacheManager.py new file mode 100644 index 0000000..ccf13c5 --- /dev/null +++ b/apt_p2p/CacheManager.py @@ -0,0 +1,440 @@ + +"""Manage a cache of downloaded files. + +@var DECOMPRESS_EXTS: a list of file extensions that need to be decompressed +@var DECOMPRESS_FILES: a list of file names that need to be decompressed +""" + +from bz2 import BZ2Decompressor +from zlib import decompressobj, MAX_WBITS +from gzip import FCOMMENT, FEXTRA, FHCRC, FNAME, FTEXT +from urlparse import urlparse +import os + +from twisted.python import log +from twisted.python.filepath import FilePath +from twisted.internet import defer, reactor +from twisted.trial import unittest +from twisted.web2 import stream +from twisted.web2.http import splitHostPort + +from Hash import HashObject + +DECOMPRESS_EXTS = ['.gz', '.bz2'] +DECOMPRESS_FILES = ['release', 'sources', 'packages'] + +class ProxyFileStream(stream.SimpleStream): + """Saves a stream to a file while providing a new stream. + + Also optionally decompresses the file while it is being downloaded. + + @type stream: L{twisted.web2.stream.IByteStream} + @ivar stream: the input stream being read + @type outFile: L{twisted.python.filepath.FilePath} + @ivar outFile: the file being written + @type hash: L{Hash.HashObject} + @ivar hash: the hash object for the file + @type gzfile: C{file} + @ivar gzfile: the open file to write decompressed gzip data to + @type gzdec: L{zlib.decompressobj} + @ivar gzdec: the decompressor to use for the compressed gzip data + @type gzheader: C{boolean} + @ivar gzheader: whether the gzip header still needs to be removed from + the zlib compressed data + @type bz2file: C{file} + @ivar bz2file: the open file to write decompressed bz2 data to + @type bz2dec: L{bz2.BZ2Decompressor} + @ivar bz2dec: the decompressor to use for the compressed bz2 data + @type length: C{int} + @ivar length: the length of the original (compressed) file + @type doneDefer: L{twisted.internet.defer.Deferred} + @ivar doneDefer: the deferred that will fire when done streaming + + @group Stream implementation: read, close + + """ + + def __init__(self, stream, outFile, hash, decompress = None, decFile = None): + """Initializes the proxy. + + @type stream: L{twisted.web2.stream.IByteStream} + @param stream: the input stream to read from + @type outFile: L{twisted.python.filepath.FilePath} + @param outFile: the file to write to + @type hash: L{Hash.HashObject} + @param hash: the hash object to use for the file + @type decompress: C{string} + @param decompress: also decompress the file as this type + (currently only '.gz' and '.bz2' are supported) + @type decFile: C{twisted.python.FilePath} + @param decFile: the file to write the decompressed data to + """ + self.stream = stream + self.outFile = outFile.open('w') + self.hash = hash + self.hash.new() + self.gzfile = None + self.bz2file = None + if decompress == ".gz": + self.gzheader = True + self.gzfile = decFile.open('w') + self.gzdec = decompressobj(-MAX_WBITS) + elif decompress == ".bz2": + self.bz2file = decFile.open('w') + self.bz2dec = BZ2Decompressor() + self.length = self.stream.length + self.doneDefer = defer.Deferred() + + def _done(self): + """Close all the output files, return the result.""" + if not self.outFile.closed: + self.outFile.close() + self.hash.digest() + if self.gzfile: + # Finish the decompression + data_dec = self.gzdec.flush() + self.gzfile.write(data_dec) + self.gzfile.close() + self.gzfile = None + if self.bz2file: + self.bz2file.close() + self.bz2file = None + + self.doneDefer.callback(self.hash) + + def read(self): + """Read some data from the stream.""" + if self.outFile.closed: + return None + + # Read data from the stream, deal with the possible deferred + data = self.stream.read() + if isinstance(data, defer.Deferred): + data.addCallbacks(self._write, self._done) + return data + + self._write(data) + return data + + def _write(self, data): + """Write the stream data to the file and return it for others to use. + + Also optionally decompresses it. + """ + if data is None: + self._done() + return data + + # Write and hash the streamed data + self.outFile.write(data) + self.hash.update(data) + + if self.gzfile: + # Decompress the zlib portion of the file + if self.gzheader: + # Remove the gzip header junk + self.gzheader = False + new_data = self._remove_gzip_header(data) + dec_data = self.gzdec.decompress(new_data) + else: + dec_data = self.gzdec.decompress(data) + self.gzfile.write(dec_data) + if self.bz2file: + # Decompress the bz2 file + dec_data = self.bz2dec.decompress(data) + self.bz2file.write(dec_data) + + return data + + def _remove_gzip_header(self, data): + """Remove the gzip header from the zlib compressed data.""" + # Read, check & discard the header fields + if data[:2] != '\037\213': + raise IOError, 'Not a gzipped file' + if ord(data[2]) != 8: + raise IOError, 'Unknown compression method' + flag = ord(data[3]) + # modtime = self.fileobj.read(4) + # extraflag = self.fileobj.read(1) + # os = self.fileobj.read(1) + + skip = 10 + if flag & FEXTRA: + # Read & discard the extra field + xlen = ord(data[10]) + xlen = xlen + 256*ord(data[11]) + skip = skip + 2 + xlen + if flag & FNAME: + # Read and discard a null-terminated string containing the filename + while True: + if not data[skip] or data[skip] == '\000': + break + skip += 1 + skip += 1 + if flag & FCOMMENT: + # Read and discard a null-terminated string containing a comment + while True: + if not data[skip] or data[skip] == '\000': + break + skip += 1 + skip += 1 + if flag & FHCRC: + skip += 2 # Read & discard the 16-bit header CRC + + return data[skip:] + + def close(self): + """Clean everything up and return None to future reads.""" + self.length = 0 + self._done() + self.stream.close() + +class CacheManager: + """Manages all downloaded files and requests for cached objects. + + @type cache_dir: L{twisted.python.filepath.FilePath} + @ivar cache_dir: the directory to use for storing all files + @type other_dirs: C{list} of L{twisted.python.filepath.FilePath} + @ivar other_dirs: the other directories that have shared files in them + @type all_dirs: C{list} of L{twisted.python.filepath.FilePath} + @ivar all_dirs: all the directories that have cached files in them + @type db: L{db.DB} + @ivar db: the database to use for tracking files and hashes + @type manager: L{apt_p2p.AptP2P} + @ivar manager: the main program object to send requests to + @type scanning: C{list} of L{twisted.python.filepath.FilePath} + @ivar scanning: all the directories that are currectly being scanned or waiting to be scanned + """ + + def __init__(self, cache_dir, db, other_dirs = [], manager = None): + """Initialize the instance and remove any untracked files from the DB.. + + @type cache_dir: L{twisted.python.filepath.FilePath} + @param cache_dir: the directory to use for storing all files + @type db: L{db.DB} + @param db: the database to use for tracking files and hashes + @type other_dirs: C{list} of L{twisted.python.filepath.FilePath} + @param other_dirs: the other directories that have shared files in them + (optional, defaults to only using the cache directory) + @type manager: L{apt_p2p.AptP2P} + @param manager: the main program object to send requests to + (optional, defaults to not calling back with cached files) + """ + self.cache_dir = cache_dir + self.other_dirs = other_dirs + self.all_dirs = self.other_dirs[:] + self.all_dirs.insert(0, self.cache_dir) + self.db = db + self.manager = manager + self.scanning = [] + + # Init the database, remove old files + self.db.removeUntrackedFiles(self.all_dirs) + + #{ Scanning directories + def scanDirectories(self): + """Scan the cache directories, hashing new and rehashing changed files.""" + assert not self.scanning, "a directory scan is already under way" + self.scanning = self.all_dirs[:] + self._scanDirectories() + + def _scanDirectories(self, result = None, walker = None): + """Walk each directory looking for cached files. + + @param result: the result of a DHT store request, not used (optional) + @param walker: the walker to use to traverse the current directory + (optional, defaults to creating a new walker from the first + directory in the L{CacheManager.scanning} list) + """ + # Need to start walking a new directory + if walker is None: + # If there are any left, get them + if self.scanning: + log.msg('started scanning directory: %s' % self.scanning[0].path) + walker = self.scanning[0].walk() + else: + log.msg('cache directory scan complete') + return + + try: + # Get the next file in the directory + file = walker.next() + except StopIteration: + # No files left, go to the next directory + log.msg('done scanning directory: %s' % self.scanning[0].path) + self.scanning.pop(0) + reactor.callLater(0, self._scanDirectories) + return + + # If it's not a file ignore it + if not file.isfile(): + log.msg('entering directory: %s' % file.path) + reactor.callLater(0, self._scanDirectories, None, walker) + return + + # If it's already properly in the DB, ignore it + db_status = self.db.isUnchanged(file) + if db_status: + log.msg('file is unchanged: %s' % file.path) + reactor.callLater(0, self._scanDirectories, None, walker) + return + + # Don't hash files in the cache that are not in the DB + if self.scanning[0] == self.cache_dir: + if db_status is None: + log.msg('ignoring unknown cache file: %s' % file.path) + else: + log.msg('removing changed cache file: %s' % file.path) + file.remove() + reactor.callLater(0, self._scanDirectories, None, walker) + return + + # Otherwise hash it + log.msg('start hash checking file: %s' % file.path) + hash = HashObject() + df = hash.hashInThread(file) + df.addBoth(self._doneHashing, file, walker) + df.addErrback(log.err) + + def _doneHashing(self, result, file, walker): + """If successful, add the hashed file to the DB and inform the main program.""" + if isinstance(result, HashObject): + log.msg('hash check of %s completed with hash: %s' % (file.path, result.hexdigest())) + + # Only set a URL if this is a downloaded file + url = None + if self.scanning[0] == self.cache_dir: + url = 'http:/' + file.path[len(self.cache_dir.path):] + + # Store the hashed file in the database + new_hash = self.db.storeFile(file, result.digest()) + + # Tell the main program to handle the new cache file + df = self.manager.new_cached_file(file, result, new_hash, url, True) + if df is None: + reactor.callLater(0, self._scanDirectories, None, walker) + else: + df.addBoth(self._scanDirectories, walker) + else: + # Must have returned an error + log.msg('hash check of %s failed' % file.path) + log.err(result) + reactor.callLater(0, self._scanDirectories, None, walker) + + #{ Downloading files + def save_file(self, response, hash, url): + """Save a downloaded file to the cache and stream it. + + @type response: L{twisted.web2.http.Response} + @param response: the response from the download + @type hash: L{Hash.HashObject} + @param hash: the hash object containing the expected hash for the file + @param url: the URI of the actual mirror request + @rtype: L{twisted.web2.http.Response} + @return: the final response from the download + """ + if response.code != 200: + log.msg('File was not found (%r): %s' % (response, url)) + return response + + log.msg('Returning file: %s' % url) + + # Set the destination path for the file + parsed = urlparse(url) + destFile = self.cache_dir.preauthChild(parsed[1] + parsed[2]) + log.msg('Saving returned %r byte file to cache: %s' % (response.stream.length, destFile.path)) + + # Make sure there's a free place for the file + if destFile.exists(): + log.msg('File already exists, removing: %s' % destFile.path) + destFile.remove() + elif not destFile.parent().exists(): + destFile.parent().makedirs() + + # Determine whether it needs to be decompressed and how + root, ext = os.path.splitext(destFile.basename()) + if root.lower() in DECOMPRESS_FILES and ext.lower() in DECOMPRESS_EXTS: + ext = ext.lower() + decFile = destFile.sibling(root) + log.msg('Decompressing to: %s' % decFile.path) + if decFile.exists(): + log.msg('File already exists, removing: %s' % decFile.path) + decFile.remove() + else: + ext = None + decFile = None + + # Create the new stream from the old one. + orig_stream = response.stream + response.stream = ProxyFileStream(orig_stream, destFile, hash, ext, decFile) + response.stream.doneDefer.addCallback(self._save_complete, url, destFile, + response.headers.getHeader('Last-Modified'), + decFile) + response.stream.doneDefer.addErrback(self.save_error, url) + + # Return the modified response with the new stream + return response + + def _save_complete(self, hash, url, destFile, modtime = None, decFile = None): + """Update the modification time and inform the main program. + + @type hash: L{Hash.HashObject} + @param hash: the hash object containing the expected hash for the file + @param url: the URI of the actual mirror request + @type destFile: C{twisted.python.FilePath} + @param destFile: the file where the download was written to + @type modtime: C{int} + @param modtime: the modified time of the cached file (seconds since epoch) + (optional, defaults to not setting the modification time of the file) + @type decFile: C{twisted.python.FilePath} + @param decFile: the file where the decompressed download was written to + (optional, defaults to the file not having been compressed) + """ + if modtime: + os.utime(destFile.path, (modtime, modtime)) + if decFile: + os.utime(decFile.path, (modtime, modtime)) + + result = hash.verify() + if result or result is None: + if result: + log.msg('Hashes match: %s' % url) + else: + log.msg('Hashed file to %s: %s' % (hash.hexdigest(), url)) + + new_hash = self.db.storeFile(destFile, hash.digest()) + log.msg('now avaliable: %s' % (url)) + + if self.manager: + self.manager.new_cached_file(destFile, hash, new_hash, url) + if decFile: + ext_len = len(destFile.path) - len(decFile.path) + self.manager.new_cached_file(decFile, None, False, url[:-ext_len]) + else: + log.msg("Hashes don't match %s != %s: %s" % (hash.hexexpected(), hash.hexdigest(), url)) + destFile.remove() + if decFile: + decFile.remove() + + def save_error(self, failure, url): + """An error has occurred in downloadign or saving the file.""" + log.msg('Error occurred downloading %s' % url) + log.err(failure) + return failure + +class TestMirrorManager(unittest.TestCase): + """Unit tests for the mirror manager.""" + + timeout = 20 + pending_calls = [] + client = None + + def setUp(self): + self.client = CacheManager(FilePath('/tmp/.apt-p2p')) + + def tearDown(self): + for p in self.pending_calls: + if p.active(): + p.cancel() + self.client = None + \ No newline at end of file diff --git a/apt_p2p/HTTPDownloader.py b/apt_p2p/HTTPDownloader.py new file mode 100644 index 0000000..eb36932 --- /dev/null +++ b/apt_p2p/HTTPDownloader.py @@ -0,0 +1,423 @@ + +"""Manage all download requests to a single site.""" + +from math import exp +from datetime import datetime, timedelta + +from twisted.internet import reactor, defer, protocol +from twisted.internet.protocol import ClientFactory +from twisted import version as twisted_version +from twisted.python import log +from twisted.web2.client.interfaces import IHTTPClientManager +from twisted.web2.client.http import ProtocolError, ClientRequest, HTTPClientProtocol +from twisted.web2 import stream as stream_mod, http_headers +from twisted.web2 import version as web2_version +from twisted.trial import unittest +from zope.interface import implements + +from apt_p2p_conf import version + +class Peer(ClientFactory): + """A manager for all HTTP requests to a single peer. + + Controls all requests that go to a single peer (host and port). + This includes buffering requests until they can be sent and reconnecting + in the event of the connection being closed. + + """ + + implements(IHTTPClientManager) + + def __init__(self, host, port=80): + self.host = host + self.port = port + self.busy = False + self.pipeline = False + self.closed = True + self.connecting = False + self.request_queue = [] + self.response_queue = [] + self.proto = None + self.connector = None + self._errors = 0 + self._completed = 0 + self._downloadSpeeds = [] + self._lastResponse = None + self._responseTimes = [] + + #{ Manage the request queue + def connect(self): + """Connect to the peer.""" + assert self.closed and not self.connecting + self.connecting = True + d = protocol.ClientCreator(reactor, HTTPClientProtocol, self).connectTCP(self.host, self.port) + d.addCallback(self.connected) + + def connected(self, proto): + """Begin processing the queued requests.""" + self.closed = False + self.connecting = False + self.proto = proto + self.processQueue() + + def close(self): + """Close the connection to the peer.""" + if not self.closed: + self.proto.transport.loseConnection() + + def submitRequest(self, request): + """Add a new request to the queue. + + @type request: L{twisted.web2.client.http.ClientRequest} + @return: deferred that will fire with the completed request + """ + request.submissionTime = datetime.now() + request.deferRequest = defer.Deferred() + self.request_queue.append(request) + self.processQueue() + return request.deferRequest + + def processQueue(self): + """Check the queue to see if new requests can be sent to the peer.""" + if not self.request_queue: + return + if self.connecting: + return + if self.closed: + self.connect() + return + if self.busy and not self.pipeline: + return + if self.response_queue and not self.pipeline: + return + + req = self.request_queue.pop(0) + self.response_queue.append(req) + req.deferResponse = self.proto.submitRequest(req, False) + req.deferResponse.addCallbacks(self.requestComplete, self.requestError) + + def requestComplete(self, resp): + """Process a completed request.""" + self._processLastResponse() + req = self.response_queue.pop(0) + log.msg('%s of %s completed with code %d' % (req.method, req.uri, resp.code)) + self._completed += 1 + if resp.code >= 400: + self._errors += 1 + now = datetime.now() + self._responseTimes.append((now, now - req.submissionTime)) + self._lastResponse = (now, resp.stream.length) + req.deferRequest.callback(resp) + + def requestError(self, error): + """Process a request that ended with an error.""" + self._processLastResponse() + req = self.response_queue.pop(0) + log.msg('Download of %s generated error %r' % (req.uri, error)) + self._completed += 1 + self._errors += 1 + req.deferRequest.errback(error) + + def hashError(self, error): + """Log that a hash error occurred from the peer.""" + log.msg('Hash error from peer (%s, %d): %r' % (self.host, self.port, error)) + self._errors += 1 + + #{ IHTTPClientManager interface + def clientBusy(self, proto): + """Save the busy state.""" + self.busy = True + + def clientIdle(self, proto): + """Try to send a new request.""" + self._processLastResponse() + self.busy = False + self.processQueue() + + def clientPipelining(self, proto): + """Try to send a new request.""" + self.pipeline = True + self.processQueue() + + def clientGone(self, proto): + """Mark sent requests as errors.""" + self._processLastResponse() + for req in self.response_queue: + req.deferRequest.errback(ProtocolError('lost connection')) + self.busy = False + self.pipeline = False + self.closed = True + self.connecting = False + self.response_queue = [] + self.proto = None + if self.request_queue: + self.processQueue() + + #{ Downloading request interface + def setCommonHeaders(self): + """Get the common HTTP headers for all requests.""" + headers = http_headers.Headers() + headers.setHeader('Host', self.host) + headers.setHeader('User-Agent', 'apt-p2p/%s (twisted/%s twisted.web2/%s)' % + (version.short(), twisted_version.short(), web2_version.short())) + return headers + + def get(self, path, method="GET", modtime=None): + """Add a new request to the queue. + + @type path: C{string} + @param path: the path to request from the peer + @type method: C{string} + @param method: the HTTP method to use, 'GET' or 'HEAD' + (optional, defaults to 'GET') + @type modtime: C{int} + @param modtime: the modification time to use for an 'If-Modified-Since' + header, as seconds since the epoch + (optional, defaults to not sending that header) + """ + headers = self.setCommonHeaders() + if modtime: + headers.setHeader('If-Modified-Since', modtime) + return self.submitRequest(ClientRequest(method, path, headers, None)) + + def getRange(self, path, rangeStart, rangeEnd, method="GET"): + """Add a new request with a Range header to the queue. + + @type path: C{string} + @param path: the path to request from the peer + @type rangeStart: C{int} + @param rangeStart: the byte to begin the request at + @type rangeEnd: C{int} + @param rangeEnd: the byte to end the request at (inclusive) + @type method: C{string} + @param method: the HTTP method to use, 'GET' or 'HEAD' + (optional, defaults to 'GET') + """ + headers = self.setCommonHeaders() + headers.setHeader('Range', ('bytes', [(rangeStart, rangeEnd)])) + return self.submitRequest(ClientRequest(method, path, headers, None)) + + #{ Peer information + def isIdle(self): + """Check whether the peer is idle or not.""" + return not self.busy and not self.request_queue and not self.response_queue + + def _processLastResponse(self): + """Save the download time of the last request for speed calculations.""" + if self._lastResponse is not None: + now = datetime.now() + self._downloadSpeeds.append((now, now - self._lastResponse[0], self._lastResponse[1])) + self._lastResponse = None + + def downloadSpeed(self): + """Gets the latest average download speed for the peer. + + The average is over the last 10 responses that occurred in the last hour. + """ + total_time = 0.0 + total_download = 0 + now = datetime.now() + while self._downloadSpeeds and (len(self._downloadSpeeds) > 10 or + now - self._downloadSpeeds[0][0] > timedelta(seconds=3600)): + self._downloadSpeeds.pop(0) + + # If there are none, then you get 0 + if not self._downloadSpeeds: + return 0.0 + + for download in self._downloadSpeeds: + total_time += download[1].days*86400.0 + download[1].seconds + download[1].microseconds/1000000.0 + total_download += download[2] + + return total_download / total_time + + def responseTime(self): + """Gets the latest average response time for the peer. + + Response time is the time from receiving the request, to the time + the download begins. The average is over the last 10 responses that + occurred in the last hour. + """ + total_response = 0.0 + now = datetime.now() + while self._responseTimes and (len(self._responseTimes) > 10 or + now - self._responseTimes[0][0] > timedelta(seconds=3600)): + self._responseTimes.pop(0) + + # If there are none, give it the benefit of the doubt + if not self._responseTimes: + return 0.0 + + for response in self._responseTimes: + total_response += response[1].days*86400.0 + response[1].seconds + response[1].microseconds/1000000.0 + + return total_response / len(self._responseTimes) + + def rank(self, fastest): + """Determine the ranking value for the peer. + + The ranking value is composed of 5 numbers: + - 1 if a connection to the peer is open, 0.9 otherwise + - 1 if there are no pending requests, to 0 if there are a maximum + - 1 if the peer is the fastest of all peers, to 0 if the speed is 0 + - 1 if all requests are good, 0 if all produced errors + - an exponentially decreasing number based on the response time + """ + rank = 1.0 + if self.closed: + rank *= 0.9 + rank *= (max(0.0, 10.0 - len(self.request_queue) - len(self.response_queue))) / 10.0 + if fastest > 0.0: + rank *= min(1.0, self.downloadSpeed() / fastest) + if self._completed: + rank *= max(0.0, 1.0 - float(self._errors) / self._completed) + rank *= exp(-self.responseTime() / 5.0) + return rank + +class TestClientManager(unittest.TestCase): + """Unit tests for the Peer.""" + + client = None + pending_calls = [] + + def gotResp(self, resp, num, expect): + self.failUnless(resp.code >= 200 and resp.code < 300, "Got a non-200 response: %r" % resp.code) + if expect is not None: + self.failUnless(resp.stream.length == expect, "Length was incorrect, got %r, expected %r" % (resp.stream.length, expect)) + def print_(n): + pass + def printdone(n): + pass + stream_mod.readStream(resp.stream, print_).addCallback(printdone) + + def test_download(self): + """Tests a normal download.""" + host = 'www.ietf.org' + self.client = Peer(host, 80) + self.timeout = 10 + + d = self.client.get('/rfc/rfc0013.txt') + d.addCallback(self.gotResp, 1, 1070) + return d + + def test_head(self): + """Tests a 'HEAD' request.""" + host = 'www.ietf.org' + self.client = Peer(host, 80) + self.timeout = 10 + + d = self.client.get('/rfc/rfc0013.txt', "HEAD") + d.addCallback(self.gotResp, 1, 0) + return d + + def test_multiple_downloads(self): + """Tests multiple downloads with queueing and connection closing.""" + host = 'www.ietf.org' + self.client = Peer(host, 80) + self.timeout = 120 + lastDefer = defer.Deferred() + + def newRequest(path, num, expect, last=False): + d = self.client.get(path) + d.addCallback(self.gotResp, num, expect) + if last: + d.addBoth(lastDefer.callback) + + # 3 quick requests + newRequest("/rfc/rfc0006.txt", 1, 1776) + newRequest("/rfc/rfc2362.txt", 2, 159833) + newRequest("/rfc/rfc0801.txt", 3, 40824) + + # This one will probably be queued + self.pending_calls.append(reactor.callLater(1, newRequest, '/rfc/rfc0013.txt', 4, 1070)) + + # Connection should still be open, but idle + self.pending_calls.append(reactor.callLater(10, newRequest, '/rfc/rfc0022.txt', 5, 4606)) + + #Connection should be closed + self.pending_calls.append(reactor.callLater(30, newRequest, '/rfc/rfc0048.txt', 6, 41696)) + self.pending_calls.append(reactor.callLater(31, newRequest, '/rfc/rfc3261.txt', 7, 647976)) + self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0014.txt', 8, 27)) + self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0001.txt', 9, 21088)) + + # Now it should definitely be closed + self.pending_calls.append(reactor.callLater(62, newRequest, '/rfc/rfc2801.txt', 0, 598794, True)) + return lastDefer + + def test_multiple_quick_downloads(self): + """Tests lots of multiple downloads with queueing.""" + host = 'www.ietf.org' + self.client = Peer(host, 80) + self.timeout = 30 + lastDefer = defer.Deferred() + + def newRequest(path, num, expect, last=False): + d = self.client.get(path) + d.addCallback(self.gotResp, num, expect) + if last: + d.addBoth(lastDefer.callback) + + newRequest("/rfc/rfc0006.txt", 1, 1776) + newRequest("/rfc/rfc2362.txt", 2, 159833) + newRequest("/rfc/rfc0801.txt", 3, 40824) + self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0013.txt', 4, 1070)) + self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0022.txt', 5, 4606)) + self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0048.txt', 6, 41696)) + self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc3261.txt', 7, 647976)) + self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0014.txt', 8, 27)) + self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0001.txt', 9, 21088)) + self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc2801.txt', 0, 598794, True)) + return lastDefer + + def checkInfo(self): + log.msg('Rank is: %r' % self.client.rank(250.0*1024)) + log.msg('Download speed is: %r' % self.client.downloadSpeed()) + log.msg('Response Time is: %r' % self.client.responseTime()) + + def test_peer_info(self): + """Test retrieving the peer info during a download.""" + host = 'www.ietf.org' + self.client = Peer(host, 80) + self.timeout = 120 + lastDefer = defer.Deferred() + + def newRequest(path, num, expect, last=False): + d = self.client.get(path) + d.addCallback(self.gotResp, num, expect) + if last: + d.addBoth(lastDefer.callback) + + newRequest("/rfc/rfc0006.txt", 1, 1776) + newRequest("/rfc/rfc2362.txt", 2, 159833) + newRequest("/rfc/rfc0801.txt", 3, 40824) + self.pending_calls.append(reactor.callLater(1, newRequest, '/rfc/rfc0013.txt', 4, 1070)) + self.pending_calls.append(reactor.callLater(10, newRequest, '/rfc/rfc0022.txt', 5, 4606)) + self.pending_calls.append(reactor.callLater(30, newRequest, '/rfc/rfc0048.txt', 6, 41696)) + self.pending_calls.append(reactor.callLater(31, newRequest, '/rfc/rfc3261.txt', 7, 647976)) + self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0014.txt', 8, 27)) + self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0001.txt', 9, 21088)) + self.pending_calls.append(reactor.callLater(62, newRequest, '/rfc/rfc2801.txt', 0, 598794, True)) + + for i in xrange(2, 122, 2): + self.pending_calls.append(reactor.callLater(i, self.checkInfo)) + + return lastDefer + + def test_range(self): + """Test a Range request.""" + host = 'www.ietf.org' + self.client = Peer(host, 80) + self.timeout = 10 + + d = self.client.getRange('/rfc/rfc0013.txt', 100, 199) + d.addCallback(self.gotResp, 1, 100) + return d + + def tearDown(self): + for p in self.pending_calls: + if p.active(): + p.cancel() + self.pending_calls = [] + if self.client: + self.client.close() + self.client = None diff --git a/apt_p2p/HTTPServer.py b/apt_p2p/HTTPServer.py new file mode 100644 index 0000000..d252a63 --- /dev/null +++ b/apt_p2p/HTTPServer.py @@ -0,0 +1,242 @@ + +"""Serve local requests from apt and remote requests from peers.""" + +from urllib import unquote_plus +from binascii import b2a_hex + +from twisted.python import log +from twisted.internet import defer +from twisted.web2 import server, http, resource, channel, stream +from twisted.web2 import static, http_headers, responsecode + +from policies import ThrottlingFactory +from apt_p2p_Khashmir.bencode import bencode + +class FileDownloader(static.File): + """Modified to make it suitable for apt requests. + + Tries to find requests in the cache. Found files are first checked for + freshness before being sent. Requests for unfound and stale files are + forwarded to the main program for downloading. + + @type manager: L{apt_p2p.AptP2P} + @ivar manager: the main program to query + """ + + def __init__(self, path, manager, defaultType="text/plain", ignoredExts=(), processors=None, indexNames=None): + self.manager = manager + super(FileDownloader, self).__init__(path, defaultType, ignoredExts, processors, indexNames) + + def renderHTTP(self, req): + log.msg('Got request for %s from %s' % (req.uri, req.remoteAddr)) + resp = super(FileDownloader, self).renderHTTP(req) + if isinstance(resp, defer.Deferred): + resp.addCallback(self._renderHTTP_done, req) + else: + resp = self._renderHTTP_done(resp, req) + return resp + + def _renderHTTP_done(self, resp, req): + log.msg('Initial response to %s: %r' % (req.uri, resp)) + + if self.manager: + path = 'http:/' + req.uri + if resp.code >= 200 and resp.code < 400: + return self.manager.check_freshness(req, path, resp.headers.getHeader('Last-Modified'), resp) + + log.msg('Not found, trying other methods for %s' % req.uri) + return self.manager.get_resp(req, path) + + return resp + + def createSimilarFile(self, path): + return self.__class__(path, self.manager, self.defaultType, self.ignoredExts, + self.processors, self.indexNames[:]) + +class FileUploaderStream(stream.FileStream): + """Modified to make it suitable for streaming to peers. + + Streams the file is small chunks to make it easier to throttle the + streaming to peers. + + @ivar CHUNK_SIZE: the size of chunks of data to send at a time + """ + + CHUNK_SIZE = 4*1024 + + def read(self, sendfile=False): + if self.f is None: + return None + + length = self.length + if length == 0: + self.f = None + return None + + # Remove the SendFileBuffer and mmap use, just use string reads and writes + + readSize = min(length, self.CHUNK_SIZE) + + self.f.seek(self.start) + b = self.f.read(readSize) + bytesRead = len(b) + if not bytesRead: + raise RuntimeError("Ran out of data reading file %r, expected %d more bytes" % (self.f, length)) + else: + self.length -= bytesRead + self.start += bytesRead + return b + + +class FileUploader(static.File): + """Modified to make it suitable for peer requests. + + Uses the modified L{FileUploaderStream} to stream the file for throttling, + and doesn't do any listing of directory contents. + """ + + def render(self, req): + if not self.fp.exists(): + return responsecode.NOT_FOUND + + if self.fp.isdir(): + # Don't try to render a directory listing + return responsecode.NOT_FOUND + + try: + f = self.fp.open() + except IOError, e: + import errno + if e[0] == errno.EACCES: + return responsecode.FORBIDDEN + elif e[0] == errno.ENOENT: + return responsecode.NOT_FOUND + else: + raise + + response = http.Response() + # Use the modified FileStream + response.stream = FileUploaderStream(f, 0, self.fp.getsize()) + + for (header, value) in ( + ("content-type", self.contentType()), + ("content-encoding", self.contentEncoding()), + ): + if value is not None: + response.headers.setHeader(header, value) + + return response + +class TopLevel(resource.Resource): + """The HTTP server for all requests, both from peers and apt. + + @type directory: L{twisted.python.filepath.FilePath} + @ivar directory: the directory to check for cached files + @type db: L{db.DB} + @ivar db: the database to use for looking up files and hashes + @type manager: L{apt_p2p.AptP2P} + @ivar manager: the main program object to send requests to + @type factory: L{twisted.web2.channel.HTTPFactory} or L{policies.ThrottlingFactory} + @ivar factory: the factory to use to server HTTP requests + + """ + + addSlash = True + + def __init__(self, directory, db, manager): + """Initialize the instance. + + @type directory: L{twisted.python.filepath.FilePath} + @param directory: the directory to check for cached files + @type db: L{db.DB} + @param db: the database to use for looking up files and hashes + @type manager: L{apt_p2p.AptP2P} + @param manager: the main program object to send requests to + """ + self.directory = directory + self.db = db + self.manager = manager + self.factory = None + + def getHTTPFactory(self): + """Initialize and get the factory for this HTTP server.""" + if self.factory is None: + self.factory = channel.HTTPFactory(server.Site(self), + **{'maxPipeline': 10, + 'betweenRequestsTimeOut': 60}) + self.factory = ThrottlingFactory(self.factory, writeLimit = 30*1024) + return self.factory + + def render(self, ctx): + """Render a web page with descriptive statistics.""" + return http.Response( + 200, + {'content-type': http_headers.MimeType('text', 'html')}, + """ +

Statistics

TODO: eventually some stats will be shown here.""") + + def locateChild(self, request, segments): + """Process the incoming request.""" + log.msg('Got HTTP request for %s from %s' % (request.uri, request.remoteAddr)) + name = segments[0] + + # If the request is for a shared file (from a peer) + if name == '~': + if len(segments) != 2: + log.msg('Got a malformed request from %s' % request.remoteAddr) + return None, () + + # Find the file in the database + hash = unquote_plus(segments[1]) + files = self.db.lookupHash(hash) + if files: + # If it is a file, return it + if 'path' in files[0]: + log.msg('Sharing %s with %s' % (files[0]['path'].path, request.remoteAddr)) + return FileUploader(files[0]['path'].path), () + else: + # It's not for a file, but for a piece string, so return that + log.msg('Sending torrent string %s to %s' % (b2a_hex(hash), request.remoteAddr)) + return static.Data(bencode({'t': files[0]['pieces']}), 'application/x-bencoded'), () + else: + log.msg('Hash could not be found in database: %s' % hash) + + # Only local requests (apt) get past this point + if request.remoteAddr.host != "127.0.0.1": + log.msg('Blocked illegal access to %s from %s' % (request.uri, request.remoteAddr)) + return None, () + + if len(name) > 1: + # It's a request from apt + return FileDownloader(self.directory.path, self.manager), segments[0:] + else: + # Will render the statistics page + return self, () + + log.msg('Got a malformed request for "%s" from %s' % (request.uri, request.remoteAddr)) + return None, () + +if __name__ == '__builtin__': + # Running from twistd -ny HTTPServer.py + # Then test with: + # wget -S 'http://localhost:18080/~/whatever' + # wget -S 'http://localhost:18080/~/pieces' + + import os.path + from twisted.python.filepath import FilePath + + class DB: + def lookupHash(self, hash): + if hash == 'pieces': + return [{'pieces': 'abcdefghij0123456789\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!'}] + return [{'path': FilePath(os.path.expanduser('~/school/optout'))}] + + t = TopLevel(FilePath(os.path.expanduser('~')), DB(), None) + factory = t.getHTTPFactory() + + # Standard twisted application Boilerplate + from twisted.application import service, strports + application = service.Application("demoserver") + s = strports.service('tcp:18080', factory) + s.setServiceParent(application) diff --git a/apt_p2p/Hash.py b/apt_p2p/Hash.py new file mode 100644 index 0000000..850f393 --- /dev/null +++ b/apt_p2p/Hash.py @@ -0,0 +1,342 @@ + +"""Hash and store hash information for a file. + +@var PIECE_SIZE: the piece size to use for hashing pieces of files + +""" + +from binascii import b2a_hex, a2b_hex +import sys + +from twisted.internet import threads, defer +from twisted.trial import unittest + +PIECE_SIZE = 512*1024 + +class HashError(ValueError): + """An error has occurred while hashing a file.""" + +class HashObject: + """Manages hashes and hashing for a file. + + @ivar ORDER: the priority ordering of hashes, and how to extract them + + """ + + ORDER = [ {'name': 'sha1', + 'length': 20, + 'AptPkgRecord': 'SHA1Hash', + 'AptSrcRecord': False, + 'AptIndexRecord': 'SHA1', + 'old_module': 'sha', + 'hashlib_func': 'sha1', + }, + {'name': 'sha256', + 'length': 32, + 'AptPkgRecord': 'SHA256Hash', + 'AptSrcRecord': False, + 'AptIndexRecord': 'SHA256', + 'hashlib_func': 'sha256', + }, + {'name': 'md5', + 'length': 16, + 'AptPkgRecord': 'MD5Hash', + 'AptSrcRecord': True, + 'AptIndexRecord': 'MD5SUM', + 'old_module': 'md5', + 'hashlib_func': 'md5', + }, + ] + + def __init__(self, digest = None, size = None, pieces = ''): + """Initialize the hash object.""" + self.hashTypeNum = 0 # Use the first if nothing else matters + if sys.version_info < (2, 5): + # sha256 is not available in python before 2.5, remove it + for hashType in self.ORDER: + if hashType['name'] == 'sha256': + del self.ORDER[self.ORDER.index(hashType)] + break + + self.expHash = None + self.expHex = None + self.expSize = None + self.expNormHash = None + self.fileHasher = None + self.pieceHasher = None + self.fileHash = digest + self.pieceHash = [pieces[x:x+self.ORDER[self.hashTypeNum]['length']] + for x in xrange(0, len(pieces), self.ORDER[self.hashTypeNum]['length'])] + self.size = size + self.fileHex = None + self.fileNormHash = None + self.done = True + self.result = None + + #{ Hashing data + def new(self, force = False): + """Generate a new hashing object suitable for hashing a file. + + @param force: set to True to force creating a new object even if + the hash has been verified already + """ + if self.result is None or force: + self.result = None + self.done = False + self.fileHasher = self._new() + self.pieceHasher = None + self.fileHash = None + self.pieceHash = [] + self.size = 0 + self.fileHex = None + self.fileNormHash = None + + def _new(self): + """Create a new hashing object according to the hash type.""" + if sys.version_info < (2, 5): + mod = __import__(self.ORDER[self.hashTypeNum]['old_module'], globals(), locals(), []) + return mod.new() + else: + import hashlib + func = getattr(hashlib, self.ORDER[self.hashTypeNum]['hashlib_func']) + return func() + + def update(self, data): + """Add more data to the file hasher.""" + if self.result is None: + if self.done: + raise HashError, "Already done, you can't add more data after calling digest() or verify()" + if self.fileHasher is None: + raise HashError, "file hasher not initialized" + + if not self.pieceHasher and self.size + len(data) > PIECE_SIZE: + # Hash up to the piece size + self.fileHasher.update(data[:(PIECE_SIZE - self.size)]) + data = data[(PIECE_SIZE - self.size):] + self.size = PIECE_SIZE + + # Save the first piece digest and initialize a new piece hasher + self.pieceHash.append(self.fileHasher.digest()) + self.pieceHasher = self._new() + + if self.pieceHasher: + # Loop in case the data contains multiple pieces + piece_size = self.size % PIECE_SIZE + while piece_size + len(data) > PIECE_SIZE: + # Save the piece hash and start a new one + self.pieceHasher.update(data[:(PIECE_SIZE - piece_size)]) + self.pieceHash.append(self.pieceHasher.digest()) + self.pieceHasher = self._new() + + # Don't forget to hash the data normally + self.fileHasher.update(data[:(PIECE_SIZE - piece_size)]) + data = data[(PIECE_SIZE - piece_size):] + self.size += PIECE_SIZE - piece_size + piece_size = self.size % PIECE_SIZE + + # Hash any remaining data + self.pieceHasher.update(data) + + self.fileHasher.update(data) + self.size += len(data) + + def hashInThread(self, file): + """Hashes a file in a separate thread, returning a deferred that will callback with the result.""" + file.restat(False) + if not file.exists(): + df = defer.Deferred() + df.errback(HashError("file not found")) + return df + + df = threads.deferToThread(self._hashInThread, file) + return df + + def _hashInThread(self, file): + """Hashes a file, returning itself as the result.""" + f = file.open() + self.new(force = True) + data = f.read(4096) + while data: + self.update(data) + data = f.read(4096) + self.digest() + return self + + #{ Checking hashes of data + def pieceDigests(self): + """Get the piece hashes of the added file data.""" + self.digest() + return self.pieceHash + + def digest(self): + """Get the hash of the added file data.""" + if self.fileHash is None: + if self.fileHasher is None: + raise HashError, "you must hash some data first" + self.fileHash = self.fileHasher.digest() + self.done = True + + # Save the last piece hash + if self.pieceHasher: + self.pieceHash.append(self.pieceHasher.digest()) + return self.fileHash + + def hexdigest(self): + """Get the hash of the added file data in hex format.""" + if self.fileHex is None: + self.fileHex = b2a_hex(self.digest()) + return self.fileHex + + def verify(self): + """Verify that the added file data hash matches the expected hash.""" + if self.result is None and self.fileHash is not None and self.expHash is not None: + self.result = (self.fileHash == self.expHash and self.size == self.expSize) + return self.result + + #{ Expected hash + def expected(self): + """Get the expected hash.""" + return self.expHash + + def hexexpected(self): + """Get the expected hash in hex format.""" + if self.expHex is None and self.expHash is not None: + self.expHex = b2a_hex(self.expHash) + return self.expHex + + #{ Setting the expected hash + def set(self, hashType, hashHex, size): + """Initialize the hash object. + + @param hashType: must be one of the dictionaries from L{ORDER} + """ + self.hashTypeNum = self.ORDER.index(hashType) # error if not found + self.expHex = hashHex + self.expSize = int(size) + self.expHash = a2b_hex(self.expHex) + + def setFromIndexRecord(self, record): + """Set the hash from the cache of index file records. + + @type record: C{dictionary} + @param record: keys are hash types, values are tuples of (hash, size) + """ + for hashType in self.ORDER: + result = record.get(hashType['AptIndexRecord'], None) + if result: + self.set(hashType, result[0], result[1]) + return True + return False + + def setFromPkgRecord(self, record, size): + """Set the hash from Apt's binary packages cache. + + @param record: whatever is returned by apt_pkg.GetPkgRecords() + """ + for hashType in self.ORDER: + hashHex = getattr(record, hashType['AptPkgRecord'], None) + if hashHex: + self.set(hashType, hashHex, size) + return True + return False + + def setFromSrcRecord(self, record): + """Set the hash from Apt's source package records cache. + + Currently very simple since Apt only tracks MD5 hashes of source files. + + @type record: (C{string}, C{int}, C{string}) + @param record: the hash, size and path of the source file + """ + for hashType in self.ORDER: + if hashType['AptSrcRecord']: + self.set(hashType, record[0], record[1]) + return True + return False + +class TestHashObject(unittest.TestCase): + """Unit tests for the hash objects.""" + + timeout = 5 + if sys.version_info < (2, 4): + skip = "skippingme" + + def test_failure(self): + """Tests that the hash object fails when treated badly.""" + h = HashObject() + h.set(h.ORDER[0], b2a_hex('12345678901234567890'), '0') + self.failUnlessRaises(HashError, h.digest) + self.failUnlessRaises(HashError, h.hexdigest) + self.failUnlessRaises(HashError, h.update, 'gfgf') + + def test_pieces(self): + """Tests the hashing of large files into pieces.""" + h = HashObject() + h.new() + h.update('1234567890'*120*1024) + self.failUnless(h.digest() == '1(j\xd2q\x0b\n\x91\xd2\x13\x90\x15\xa3E\xcc\xb0\x8d.\xc3\xc5') + pieces = h.pieceDigests() + self.failUnless(len(pieces) == 3) + self.failUnless(pieces[0] == ',G \xd8\xbbPl\xf1\xa3\xa0\x0cW\n\xe6\xe6a\xc9\x95/\xe5') + self.failUnless(pieces[1] == '\xf6V\xeb/\xa8\xad[\x07Z\xf9\x87\xa4\xf5w\xdf\xe1|\x00\x8e\x93') + self.failUnless(pieces[2] == 'M[\xbf\xee\xaa+\x19\xbaV\xf699\r\x17o\xcb\x8e\xcfP\x19') + h.new(True) + for i in xrange(120*1024): + h.update('1234567890') + pieces = h.pieceDigests() + self.failUnless(h.digest() == '1(j\xd2q\x0b\n\x91\xd2\x13\x90\x15\xa3E\xcc\xb0\x8d.\xc3\xc5') + self.failUnless(len(pieces) == 3) + self.failUnless(pieces[0] == ',G \xd8\xbbPl\xf1\xa3\xa0\x0cW\n\xe6\xe6a\xc9\x95/\xe5') + self.failUnless(pieces[1] == '\xf6V\xeb/\xa8\xad[\x07Z\xf9\x87\xa4\xf5w\xdf\xe1|\x00\x8e\x93') + self.failUnless(pieces[2] == 'M[\xbf\xee\xaa+\x19\xbaV\xf699\r\x17o\xcb\x8e\xcfP\x19') + + def test_sha1(self): + """Test hashing using the SHA1 hash.""" + h = HashObject() + found = False + for hashType in h.ORDER: + if hashType['name'] == 'sha1': + found = True + break + self.failUnless(found == True) + h.set(hashType, '3bba0a5d97b7946ad2632002bf9caefe2cb18e00', '19') + h.new() + h.update('apt-p2p is the best') + self.failUnless(h.hexdigest() == '3bba0a5d97b7946ad2632002bf9caefe2cb18e00') + self.failUnlessRaises(HashError, h.update, 'gfgf') + self.failUnless(h.verify() == True) + + def test_md5(self): + """Test hashing using the MD5 hash.""" + h = HashObject() + found = False + for hashType in h.ORDER: + if hashType['name'] == 'md5': + found = True + break + self.failUnless(found == True) + h.set(hashType, '6b5abdd30d7ed80edd229f9071d8c23c', '19') + h.new() + h.update('apt-p2p is the best') + self.failUnless(h.hexdigest() == '6b5abdd30d7ed80edd229f9071d8c23c') + self.failUnlessRaises(HashError, h.update, 'gfgf') + self.failUnless(h.verify() == True) + + def test_sha256(self): + """Test hashing using the SHA256 hash.""" + h = HashObject() + found = False + for hashType in h.ORDER: + if hashType['name'] == 'sha256': + found = True + break + self.failUnless(found == True) + h.set(hashType, '47f2238a30a0340faa2bf01a9bdc42ba77b07b411cda1e24cd8d7b5c4b7d82a7', '19') + h.new() + h.update('apt-p2p is the best') + self.failUnless(h.hexdigest() == '47f2238a30a0340faa2bf01a9bdc42ba77b07b411cda1e24cd8d7b5c4b7d82a7') + self.failUnlessRaises(HashError, h.update, 'gfgf') + self.failUnless(h.verify() == True) + + if sys.version_info < (2, 5): + test_sha256.skip = "SHA256 hashes are not supported by Python until version 2.5" diff --git a/apt_p2p/MirrorManager.py b/apt_p2p/MirrorManager.py new file mode 100644 index 0000000..4c19f10 --- /dev/null +++ b/apt_p2p/MirrorManager.py @@ -0,0 +1,245 @@ + +"""Manage the multiple mirrors that may be requested. + +@var aptpkg_dir: the name of the directory to use for mirror files +""" + +from urlparse import urlparse +import os + +from twisted.python import log +from twisted.python.filepath import FilePath +from twisted.internet import defer +from twisted.trial import unittest +from twisted.web2.http import splitHostPort + +from AptPackages import AptPackages + +aptpkg_dir='apt-packages' + +class MirrorError(Exception): + """Exception raised when there's a problem with the mirror.""" + +class MirrorManager: + """Manages all requests for mirror information. + + @type cache_dir: L{twisted.python.filepath.FilePath} + @ivar cache_dir: the directory to use for storing all files + @type unload_delay: C{int} + @ivar unload_delay: the time to wait before unloading the apt cache + @type apt_caches: C{dictionary} + @ivar apt_caches: the avaliable mirrors + """ + + def __init__(self, cache_dir, unload_delay): + self.cache_dir = cache_dir + self.unload_delay = unload_delay + self.apt_caches = {} + + def extractPath(self, url): + """Break the full URI down into the site, base directory and path. + + Site is the host and port of the mirror. Base directory is the + directory to the mirror location (usually just '/debian'). Path is + the remaining path to get to the file. + + E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2 + would return ('ftp.debian.org:80', '/debian', + '/dists/sid/binary-i386/Packages.bz2'). + + @param url: the URI of the file's location on the mirror + @rtype: (C{string}, C{string}, C{string}) + @return: the site, base directory and path to the file + """ + # Extract the host and port + parsed = urlparse(url) + host, port = splitHostPort(parsed[0], parsed[1]) + site = host + ":" + str(port) + path = parsed[2] + + # Try to find the base directory (most can be found this way) + i = max(path.rfind('/dists/'), path.rfind('/pool/')) + if i >= 0: + baseDir = path[:i] + path = path[i:] + else: + # Uh oh, this is not good + log.msg("Couldn't find a good base directory for path: %s" % (site + path)) + + # Try to find an existing cache that starts with this one + # (fallback to using an empty base directory) + baseDir = '' + if site in self.apt_caches: + longest_match = 0 + for base in self.apt_caches[site]: + base_match = '' + for dirs in path.split('/'): + if base.startswith(base_match + '/' + dirs): + base_match += '/' + dirs + else: + break + if len(base_match) > longest_match: + longest_match = len(base_match) + baseDir = base_match + log.msg("Settled on baseDir: %s" % baseDir) + + return site, baseDir, path + + def init(self, site, baseDir): + """Make sure an L{AptPackages} exists for this mirror.""" + if site not in self.apt_caches: + self.apt_caches[site] = {} + + if baseDir not in self.apt_caches[site]: + site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_')) + site_cache.makedirs + self.apt_caches[site][baseDir] = AptPackages(site_cache, self.unload_delay) + + def updatedFile(self, url, file_path): + """A file in the mirror has changed or been added. + + @see: L{AptPackages.PackageFileList.update_file} + """ + site, baseDir, path = self.extractPath(url) + self.init(site, baseDir) + self.apt_caches[site][baseDir].file_updated(path, file_path) + + def findHash(self, url): + """Find the hash for a given url. + + @param url: the URI of the file's location on the mirror + @rtype: L{twisted.internet.defer.Deferred} + @return: a deferred that will fire with the returned L{Hash.HashObject} + """ + site, baseDir, path = self.extractPath(url) + if site in self.apt_caches and baseDir in self.apt_caches[site]: + return self.apt_caches[site][baseDir].findHash(path) + d = defer.Deferred() + d.errback(MirrorError("Site Not Found")) + return d + + def cleanup(self): + for site in self.apt_caches.keys(): + for baseDir in self.apt_caches[site].keys(): + self.apt_caches[site][baseDir].cleanup() + del self.apt_caches[site][baseDir] + del self.apt_caches[site] + +class TestMirrorManager(unittest.TestCase): + """Unit tests for the mirror manager.""" + + timeout = 20 + pending_calls = [] + client = None + + def setUp(self): + self.client = MirrorManager(FilePath('/tmp/.apt-p2p'), 300) + + def test_extractPath(self): + """Test extracting the site and base directory from various mirrors.""" + site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release') + self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site) + self.failUnless(baseDir == "/debian", "no match: %s" % baseDir) + self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path) + + site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz') + self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site) + self.failUnless(baseDir == "/debian", "no match: %s" % baseDir) + self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path) + + site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release') + self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site) + self.failUnless(baseDir == "", "no match: %s" % baseDir) + self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path) + + def verifyHash(self, found_hash, path, true_hash): + self.failUnless(found_hash.hexexpected() == true_hash, + "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash)) + + def test_findHash(self): + """Tests finding the hash of an index file, binary package, source package, and another index file.""" + # Find the largest index files that are for 'main' + self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n') + self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n') + + # Find the Release file corresponding to the found Packages file + for f in os.walk('/var/lib/apt/lists').next()[2]: + if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]): + self.releaseFile = f + break + + # Add all the found files to the mirror + self.client.updatedFile('http://' + self.releaseFile.replace('_','/'), + FilePath('/var/lib/apt/lists/' + self.releaseFile)) + self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + + self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'), + FilePath('/var/lib/apt/lists/' + self.packagesFile)) + self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + + self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'), + FilePath('/var/lib/apt/lists/' + self.sourcesFile)) + + lastDefer = defer.Deferred() + + # Lookup a Packages.bz2 file + idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + + '/var/lib/apt/lists/' + self.releaseFile + + ' | grep -E " main/binary-i386/Packages.bz2$"' + ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n') + idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2' + + d = self.client.findHash(idx_path) + d.addCallback(self.verifyHash, idx_path, idx_hash) + + # Lookup the binary 'dpkg' package + pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.packagesFile + + ' | grep -E "^SHA1:" | head -n 1' + + ' | cut -d\ -f 2').read().rstrip('\n') + pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \ + os.popen('grep -A 30 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.packagesFile + + ' | grep -E "^Filename:" | head -n 1' + + ' | cut -d\ -f 2').read().rstrip('\n') + + d = self.client.findHash(pkg_path) + d.addCallback(self.verifyHash, pkg_path, pkg_hash) + + # Lookup the source 'dpkg' package + src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.sourcesFile + + ' | grep -E "^Directory:" | head -n 1' + + ' | cut -d\ -f 2').read().rstrip('\n') + src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.sourcesFile + + ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + + ' | cut -d\ -f 2').read().split('\n')[:-1] + src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + + '/var/lib/apt/lists/' + self.sourcesFile + + ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + + ' | cut -d\ -f 4').read().split('\n')[:-1] + + for i in range(len(src_hashes)): + src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i] + d = self.client.findHash(src_path) + d.addCallback(self.verifyHash, src_path, src_hashes[i]) + + # Lookup a Sources.bz2 file + idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + + '/var/lib/apt/lists/' + self.releaseFile + + ' | grep -E " main/source/Sources.bz2$"' + ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n') + idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2' + + d = self.client.findHash(idx_path) + d.addCallback(self.verifyHash, idx_path, idx_hash) + + d.addBoth(lastDefer.callback) + return lastDefer + + def tearDown(self): + for p in self.pending_calls: + if p.active(): + p.cancel() + self.client.cleanup() + self.client = None + \ No newline at end of file diff --git a/apt_p2p/PeerManager.py b/apt_p2p/PeerManager.py new file mode 100644 index 0000000..faa0fe3 --- /dev/null +++ b/apt_p2p/PeerManager.py @@ -0,0 +1,153 @@ + +"""Manage a set of peers and the requests to them.""" + +from random import choice +from urlparse import urlparse, urlunparse +from urllib import quote_plus + +from twisted.internet import reactor, defer +from twisted.python import log +from twisted.trial import unittest +from twisted.web2 import stream as stream_mod +from twisted.web2.http import splitHostPort + +from HTTPDownloader import Peer +from util import uncompact + +class PeerManager: + """Manage a set of peers and the requests to them. + + @type clients: C{dictionary} + @ivar clients: the available peers that have been previously contacted + """ + + def __init__(self): + """Initialize the instance.""" + self.clients = {} + + def get(self, hash, mirror, peers = [], method="GET", modtime=None): + """Download from a list of peers or fallback to a mirror. + + @type hash: L{Hash.HashObject} + @param hash: the hash object containing the expected hash for the file + @param mirror: the URI of the file on the mirror + @type peers: C{list} of C{string} + @param peers: a list of the peer info where the file can be found + (optional, defaults to downloading from the mirror) + @type method: C{string} + @param method: the HTTP method to use, 'GET' or 'HEAD' + (optional, defaults to 'GET') + @type modtime: C{int} + @param modtime: the modification time to use for an 'If-Modified-Since' + header, as seconds since the epoch + (optional, defaults to not sending that header) + """ + if peers: + # Choose one of the peers at random + compact_peer = choice(peers) + peer = uncompact(compact_peer['c']) + log.msg('Downloading from peer %r' % (peer, )) + site = peer + path = '/~/' + quote_plus(hash.expected()) + else: + log.msg('Downloading (%s) from mirror %s' % (method, mirror)) + parsed = urlparse(mirror) + assert parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0] + site = splitHostPort(parsed[0], parsed[1]) + path = urlunparse(('', '') + parsed[2:]) + + return self.getPeer(site, path, method, modtime) + + def getPeer(self, site, path, method="GET", modtime=None): + """Create a new peer if necessary and forward the request to it. + + @type site: (C{string}, C{int}) + @param site: the IP address and port of the peer + @type path: C{string} + @param path: the path to the file on the peer + @type method: C{string} + @param method: the HTTP method to use, 'GET' or 'HEAD' + (optional, defaults to 'GET') + @type modtime: C{int} + @param modtime: the modification time to use for an 'If-Modified-Since' + header, as seconds since the epoch + (optional, defaults to not sending that header) + """ + if site not in self.clients: + self.clients[site] = Peer(site[0], site[1]) + return self.clients[site].get(path, method, modtime) + + def close(self): + """Close all the connections to peers.""" + for site in self.clients: + self.clients[site].close() + self.clients = {} + +class TestPeerManager(unittest.TestCase): + """Unit tests for the PeerManager.""" + + manager = None + pending_calls = [] + + def gotResp(self, resp, num, expect): + self.failUnless(resp.code >= 200 and resp.code < 300, "Got a non-200 response: %r" % resp.code) + if expect is not None: + self.failUnless(resp.stream.length == expect, "Length was incorrect, got %r, expected %r" % (resp.stream.length, expect)) + def print_(n): + pass + def printdone(n): + pass + stream_mod.readStream(resp.stream, print_).addCallback(printdone) + + def test_download(self): + """Tests a normal download.""" + self.manager = PeerManager() + self.timeout = 10 + + host = 'www.ietf.org' + d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt') + d.addCallback(self.gotResp, 1, 1070) + return d + + def test_head(self): + """Tests a 'HEAD' request.""" + self.manager = PeerManager() + self.timeout = 10 + + host = 'www.ietf.org' + d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt', method = "HEAD") + d.addCallback(self.gotResp, 1, 0) + return d + + def test_multiple_downloads(self): + """Tests multiple downloads with queueing and connection closing.""" + self.manager = PeerManager() + self.timeout = 120 + lastDefer = defer.Deferred() + + def newRequest(host, path, num, expect, last=False): + d = self.manager.get('', 'http://' + host + ':' + str(80) + path) + d.addCallback(self.gotResp, num, expect) + if last: + d.addBoth(lastDefer.callback) + + newRequest('www.ietf.org', "/rfc/rfc0006.txt", 1, 1776) + newRequest('www.ietf.org', "/rfc/rfc2362.txt", 2, 159833) + newRequest('www.google.ca', "/", 3, None) + self.pending_calls.append(reactor.callLater(1, newRequest, 'www.sfu.ca', '/', 4, None)) + self.pending_calls.append(reactor.callLater(10, newRequest, 'www.ietf.org', '/rfc/rfc0048.txt', 5, 41696)) + self.pending_calls.append(reactor.callLater(30, newRequest, 'www.ietf.org', '/rfc/rfc0022.txt', 6, 4606)) + self.pending_calls.append(reactor.callLater(31, newRequest, 'www.sfu.ca', '/studentcentral/index.html', 7, None)) + self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0014.txt', 8, 27)) + self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0001.txt', 9, 21088)) + self.pending_calls.append(reactor.callLater(62, newRequest, 'www.google.ca', '/intl/en/options/', 0, None, True)) + return lastDefer + + def tearDown(self): + for p in self.pending_calls: + if p.active(): + p.cancel() + self.pending_calls = [] + if self.manager: + self.manager.close() + self.manager = None diff --git a/apt_p2p/__init__.py b/apt_p2p/__init__.py new file mode 100644 index 0000000..356924f --- /dev/null +++ b/apt_p2p/__init__.py @@ -0,0 +1,47 @@ + +"""The main apt-p2p modules. + +To run apt-p2p, you probably want to do something like:: + + from apt_p2p.apt_p2p import AptP2P + myapp = AptP2P(myDHT) + +where myDHT is a DHT that implements interfaces.IDHT. + +Diagram of the interaction between the given modules:: + + +---------------+ +-----------------------------------+ +------------- + | AptP2P | | DHT | | Internet + | |--->|join DHT|----|--\ + | |--->|loadConfig | | | Another + | |--->|getValue | | | Node + | |--->|storeValue DHT|<---|--/ + | |--->|leave | | + | | +-----------------------------------+ | + | | +-------------+ +----------------+ | + | | | PeerManager | | HTTPDownloader*| | + | |--->|get |--->|get HTTP|----|---> Mirror + | | | |--->|getRange | | + | |--->|close |--->|close HTTP|----|--\ + | | +-------------+ +----------------+ | | Another + | | +-----------------------------------+ | | Peer + | | | HTTPServer HTTP|<---|--/ + | |--->|getHTTPFactory | +------------- + |check_freshness|<---| | +------------- + | get_resp|<---| HTTP|<---|HTTP Request + | | +-----------------------------------+ | + | | +---------------+ +--------------+ | Local Net + | | | CacheManager | | ProxyFile- | | (apt) + | |--->|scanDirectories| | Stream* | | + | |--->|save_file |--->|__init__ HTTP|--->|HTTP Response + | |--->|save_error | | | +------------- + | | | | | | +------------- + |new_cached_file|<---| | | file|--->|write file + | | +---------------+ +--------------+ | + | | +---------------+ +--------------+ | Filesystem + | | | MirrorManager | | AptPackages* | | + | |--->|updatedFile |--->|file_updated | | + | |--->|findHash |--->|findHash file|<---|read file + +---------------+ +---------------+ +--------------+ +------------- + +""" diff --git a/apt_p2p/apt_p2p.py b/apt_p2p/apt_p2p.py new file mode 100644 index 0000000..9e360a0 --- /dev/null +++ b/apt_p2p/apt_p2p.py @@ -0,0 +1,369 @@ + +"""The main program code. + +@var DHT_PIECES: the maximum number of pieces to store with our contact info + in the DHT +@var TORRENT_PIECES: the maximum number of pieces to store as a separate entry + in the DHT +@var download_dir: the name of the directory to use for downloaded files + +""" + +from binascii import b2a_hex +from urlparse import urlunparse +import os, re, sha + +from twisted.internet import defer, reactor +from twisted.web2 import server, http, http_headers, static +from twisted.python import log, failure +from twisted.python.filepath import FilePath + +from apt_p2p_conf import config +from PeerManager import PeerManager +from HTTPServer import TopLevel +from MirrorManager import MirrorManager +from CacheManager import CacheManager +from Hash import HashObject +from db import DB +from util import findMyIPAddr, compact + +DHT_PIECES = 4 +TORRENT_PIECES = 70 + +download_dir = 'cache' + +class AptP2P: + """The main code object that does all of the work. + + Contains all of the sub-components that do all the low-level work, and + coordinates communication between them. + + @type cache_dir: L{twisted.python.filepath.FilePath} + @ivar cache_dir: the directory to use for storing all files + @type db: L{db.DB} + @ivar db: the database to use for tracking files and hashes + @type dht: L{interfaces.IDHT} + @ivar dht: the DHT instance to use + @type http_server: L{HTTPServer.TopLevel} + @ivar http_server: the web server that will handle all requests from apt + and from other peers + @type peers: L{PeerManager.PeerManager} + @ivar peers: the manager of all downloads from mirrors and other peers + @type mirrors: L{MirrorManager.MirrorManager} + @ivar mirrors: the manager of downloaded information about mirrors which + can be queried to get hashes from file names + @type cache: L{CacheManager.CacheManager} + @ivar cache: the manager of all downloaded files + @type my_contact: C{string} + @ivar my_contact: the 6-byte compact peer representation of this peer's + download information (IP address and port) + """ + + def __init__(self, dht): + """Initialize all the sub-components. + + @type dht: L{interfaces.IDHT} + @param dht: the DHT instance to use + """ + log.msg('Initializing the main apt_p2p application') + self.cache_dir = FilePath(config.get('DEFAULT', 'cache_dir')) + if not self.cache_dir.child(download_dir).exists(): + self.cache_dir.child(download_dir).makedirs() + self.db = DB(self.cache_dir.child('apt-p2p.db')) + self.dht = dht + self.dht.loadConfig(config, config.get('DEFAULT', 'DHT')) + self.dht.join().addCallbacks(self.joinComplete, self.joinError) + self.http_server = TopLevel(self.cache_dir.child(download_dir), self.db, self) + self.getHTTPFactory = self.http_server.getHTTPFactory + self.peers = PeerManager() + self.mirrors = MirrorManager(self.cache_dir, config.gettime('DEFAULT', 'UNLOAD_PACKAGES_CACHE')) + other_dirs = [FilePath(f) for f in config.getstringlist('DEFAULT', 'OTHER_DIRS')] + self.cache = CacheManager(self.cache_dir.child(download_dir), self.db, other_dirs, self) + self.my_contact = None + + #{ DHT maintenance + def joinComplete(self, result): + """Complete the DHT join process and determine our download information. + + Called by the DHT when the join has been completed with information + on the external IP address and port of this peer. + """ + my_addr = findMyIPAddr(result, + config.getint(config.get('DEFAULT', 'DHT'), 'PORT'), + config.getboolean('DEFAULT', 'LOCAL_OK')) + if not my_addr: + raise RuntimeError, "IP address for this machine could not be found" + self.my_contact = compact(my_addr, config.getint('DEFAULT', 'PORT')) + self.cache.scanDirectories() + reactor.callLater(60, self.refreshFiles) + + def joinError(self, failure): + """Joining the DHT has failed.""" + log.msg("joining DHT failed miserably") + log.err(failure) + raise RuntimeError, "IP address for this machine could not be found" + + def refreshFiles(self): + """Refresh any files in the DHT that are about to expire.""" + expireAfter = config.gettime('DEFAULT', 'KEY_REFRESH') + hashes = self.db.expiredHashes(expireAfter) + if len(hashes.keys()) > 0: + log.msg('Refreshing the keys of %d DHT values' % len(hashes.keys())) + self._refreshFiles(None, hashes) + + def _refreshFiles(self, result, hashes): + if result is not None: + log.msg('Storage resulted in: %r' % result) + + if hashes: + raw_hash = hashes.keys()[0] + self.db.refreshHash(raw_hash) + hash = HashObject(raw_hash, pieces = hashes[raw_hash]['pieces']) + del hashes[raw_hash] + storeDefer = self.store(hash) + storeDefer.addBoth(self._refreshFiles, hashes) + else: + reactor.callLater(60, self.refreshFiles) + + #{ Main workflow + def check_freshness(self, req, url, modtime, resp): + """Send a HEAD to the mirror to check if the response from the cache is still valid. + + @type req: L{twisted.web2.http.Request} + @param req: the initial request sent to the HTTP server by apt + @param url: the URI of the actual mirror request + @type modtime: C{int} + @param modtime: the modified time of the cached file (seconds since epoch) + @type resp: L{twisted.web2.http.Response} + @param resp: the response from the cache to be sent to apt + @rtype: L{twisted.internet.defer.Deferred} + @return: a deferred that will be called back with the correct response + """ + log.msg('Checking if %s is still fresh' % url) + d = self.peers.get('', url, method = "HEAD", modtime = modtime) + d.addCallback(self.check_freshness_done, req, url, resp) + return d + + def check_freshness_done(self, resp, req, url, orig_resp): + """Process the returned response from the mirror. + + @type resp: L{twisted.web2.http.Response} + @param resp: the response from the mirror to the HEAD request + @type req: L{twisted.web2.http.Request} + @param req: the initial request sent to the HTTP server by apt + @param url: the URI of the actual mirror request + @type orig_resp: L{twisted.web2.http.Response} + @param orig_resp: the response from the cache to be sent to apt + """ + if resp.code == 304: + log.msg('Still fresh, returning: %s' % url) + return orig_resp + else: + log.msg('Stale, need to redownload: %s' % url) + return self.get_resp(req, url) + + def get_resp(self, req, url): + """Lookup a hash for the file in the local mirror info. + + Starts the process of getting a response to an uncached apt request. + + @type req: L{twisted.web2.http.Request} + @param req: the initial request sent to the HTTP server by apt + @param url: the URI of the actual mirror request + @rtype: L{twisted.internet.defer.Deferred} + @return: a deferred that will be called back with the response + """ + d = defer.Deferred() + + log.msg('Trying to find hash for %s' % url) + findDefer = self.mirrors.findHash(url) + + findDefer.addCallbacks(self.findHash_done, self.findHash_error, + callbackArgs=(req, url, d), errbackArgs=(req, url, d)) + findDefer.addErrback(log.err) + return d + + def findHash_error(self, failure, req, url, d): + """Process the error in hash lookup by returning an empty L{HashObject}.""" + log.err(failure) + self.findHash_done(HashObject(), req, url, d) + + def findHash_done(self, hash, req, url, d): + """Use the returned hash to lookup the file in the cache. + + If the hash was not found, the workflow skips down to download from + the mirror (L{lookupHash_done}). + + @type hash: L{Hash.HashObject} + @param hash: the hash object containing the expected hash for the file + """ + if hash.expected() is None: + log.msg('Hash for %s was not found' % url) + self.lookupHash_done([], hash, url, d) + else: + log.msg('Found hash %s for %s' % (hash.hexexpected(), url)) + + # Lookup hash in cache + locations = self.db.lookupHash(hash.expected(), filesOnly = True) + self.getCachedFile(hash, req, url, d, locations) + + def getCachedFile(self, hash, req, url, d, locations): + """Try to return the file from the cache, otherwise move on to a DHT lookup. + + @type locations: C{list} of C{dictionary} + @param locations: the files in the cache that match the hash, + the dictionary contains a key 'path' whose value is a + L{twisted.python.filepath.FilePath} object for the file. + """ + if not locations: + log.msg('Failed to return file from cache: %s' % url) + self.lookupHash(hash, url, d) + return + + # Get the first possible location from the list + file = locations.pop(0)['path'] + log.msg('Returning cached file: %s' % file.path) + + # Get it's response + resp = static.File(file.path).renderHTTP(req) + if isinstance(resp, defer.Deferred): + resp.addBoth(self._getCachedFile, hash, req, url, d, locations) + else: + self._getCachedFile(resp, hash, req, url, d, locations) + + def _getCachedFile(self, resp, hash, req, url, d, locations): + """Check the returned response to be sure it is valid.""" + if isinstance(resp, failure.Failure): + log.msg('Got error trying to get cached file') + log.err() + # Try the next possible location + self.getCachedFile(hash, req, url, d, locations) + return + + log.msg('Cached response: %r' % resp) + + if resp.code >= 200 and resp.code < 400: + d.callback(resp) + else: + # Try the next possible location + self.getCachedFile(hash, req, url, d, locations) + + def lookupHash(self, hash, url, d): + """Lookup the hash in the DHT.""" + log.msg('Looking up hash in DHT for file: %s' % url) + key = hash.expected() + lookupDefer = self.dht.getValue(key) + lookupDefer.addCallback(self.lookupHash_done, hash, url, d) + + def lookupHash_done(self, values, hash, url, d): + """Start the download of the file. + + The download will be from peers if the DHT lookup succeeded, or + from the mirror otherwise. + + @type values: C{list} of C{dictionary} + @param values: the returned values from the DHT containing peer + download information + """ + if not values: + log.msg('Peers for %s were not found' % url) + getDefer = self.peers.get(hash, url) + getDefer.addCallback(self.cache.save_file, hash, url) + getDefer.addErrback(self.cache.save_error, url) + getDefer.addCallbacks(d.callback, d.errback) + else: + log.msg('Found peers for %s: %r' % (url, values)) + # Download from the found peers + getDefer = self.peers.get(hash, url, values) + getDefer.addCallback(self.check_response, hash, url) + getDefer.addCallback(self.cache.save_file, hash, url) + getDefer.addErrback(self.cache.save_error, url) + getDefer.addCallbacks(d.callback, d.errback) + + def check_response(self, response, hash, url): + """Check the response from peers, and download from the mirror if it is not.""" + if response.code < 200 or response.code >= 300: + log.msg('Download from peers failed, going to direct download: %s' % url) + getDefer = self.peers.get(hash, url) + return getDefer + return response + + def new_cached_file(self, file_path, hash, new_hash, url = None, forceDHT = False): + """Add a newly cached file to the mirror info and/or the DHT. + + If the file was downloaded, set url to the path it was downloaded for. + Doesn't add a file to the DHT unless a hash was found for it + (but does add it anyway if forceDHT is True). + + @type file_path: L{twisted.python.filepath.FilePath} + @param file_path: the location of the file in the local cache + @type hash: L{Hash.HashObject} + @param hash: the original (expected) hash object containing also the + hash of the downloaded file + @type new_hash: C{boolean} + @param new_hash: whether the has was new to this peer, and so should + be added to the DHT + @type url: C{string} + @param url: the URI of the location of the file in the mirror + (optional, defaults to not adding the file to the mirror info) + @type forceDHT: C{boolean} + @param forceDHT: whether to force addition of the file to the DHT + even if the hash was not found in a mirror + (optional, defaults to False) + """ + if url: + self.mirrors.updatedFile(url, file_path) + + if self.my_contact and hash and new_hash and (hash.expected() is not None or forceDHT): + return self.store(hash) + return None + + def store(self, hash): + """Add a key/value pair for the file to the DHT. + + Sets the key and value from the hash information, and tries to add + it to the DHT. + """ + key = hash.digest() + value = {'c': self.my_contact} + pieces = hash.pieceDigests() + + # Determine how to store any piece data + if len(pieces) <= 1: + pass + elif len(pieces) <= DHT_PIECES: + # Short enough to be stored with our peer contact info + value['t'] = {'t': ''.join(pieces)} + elif len(pieces) <= TORRENT_PIECES: + # Short enough to be stored in a separate key in the DHT + s = sha.new().update(''.join(pieces)) + value['h'] = s.digest() + else: + # Too long, must be served up by our peer HTTP server + s = sha.new().update(''.join(pieces)) + value['l'] = s.digest() + + storeDefer = self.dht.storeValue(key, value) + storeDefer.addCallback(self.store_done, hash) + return storeDefer + + def store_done(self, result, hash): + """Add a key/value pair for the pieces of the file to the DHT (if necessary).""" + log.msg('Added %s to the DHT: %r' % (hash.hexdigest(), result)) + pieces = hash.pieceDigests() + if len(pieces) > DHT_PIECES and len(pieces) <= TORRENT_PIECES: + # Add the piece data key and value to the DHT + s = sha.new().update(''.join(pieces)) + key = s.digest() + value = {'t': ''.join(pieces)} + + storeDefer = self.dht.storeValue(key, value) + storeDefer.addCallback(self.store_torrent_done, key) + return storeDefer + return result + + def store_torrent_done(self, result, key): + """Adding the file to the DHT is complete, and so is the workflow.""" + log.msg('Added torrent string %s to the DHT: %r' % (b2ahex(key), result)) + return result + \ No newline at end of file diff --git a/apt_p2p/apt_p2p_conf.py b/apt_p2p/apt_p2p_conf.py new file mode 100644 index 0000000..aaf2013 --- /dev/null +++ b/apt_p2p/apt_p2p_conf.py @@ -0,0 +1,165 @@ + +"""Loading of configuration files and parameters. + +@type version: L{twisted.python.versions.Version} +@var version: the version of this program +@type DEFAULT_CONFIG_FILES: C{list} of C{string} +@var DEFAULT_CONFIG_FILES: the default config files to load (in order) +@var DEFAULTS: the default config parameter values for the main program +@var DHT_DEFAULTS: the default config parameter values for the default DHT + +""" + +import os, sys +from ConfigParser import SafeConfigParser + +from twisted.python import log, versions + +class ConfigError(Exception): + """Errors that occur in the loading of configuration variables.""" + def __init__(self, message): + self.message = message + def __str__(self): + return repr(self.message) + +version = versions.Version('apt-p2p', 0, 0, 0) + +# Set the home parameter +home = os.path.expandvars('${HOME}') +if home == '${HOME}' or not os.path.isdir(home): + home = os.path.expanduser('~') + if not os.path.isdir(home): + home = os.path.abspath(os.path.dirname(sys.argv[0])) + +DEFAULT_CONFIG_FILES=['/etc/apt-p2p/apt-p2p.conf', + home + '/.apt-p2p/apt-p2p.conf'] + +DEFAULTS = { + + # Port to listen on for all requests (TCP and UDP) + 'PORT': '9977', + + # Directory to store the downloaded files in + 'CACHE_DIR': home + '/.apt-p2p/cache', + + # Other directories containing packages to share with others + # WARNING: all files in these directories will be hashed and available + # for everybody to download + 'OTHER_DIRS': """""", + + # User name to try and run as + 'USERNAME': '', + + # Whether it's OK to use an IP addres from a known local/private range + 'LOCAL_OK': 'no', + + # Unload the packages cache after an interval of inactivity this long. + # The packages cache uses a lot of memory, and only takes a few seconds + # to reload when a new request arrives. + 'UNLOAD_PACKAGES_CACHE': '5m', + + # Refresh the DHT keys after this much time has passed. + # This should be a time slightly less than the DHT's KEY_EXPIRE value. + 'KEY_REFRESH': '57m', + + # Which DHT implementation to use. + # It must be possile to do "from .DHT import DHT" to get a class that + # implements the IDHT interface. + 'DHT': 'apt_p2p_Khashmir', + + # Whether to only run the DHT (for providing only a bootstrap node) + 'DHT-ONLY': 'no', +} + +DHT_DEFAULTS = { + # bootstrap nodes to contact to join the DHT + 'BOOTSTRAP': """www.camrdale.org:9977 + steveholt.hopto.org:9976""", + + # whether this node is a bootstrap node + 'BOOTSTRAP_NODE': "no", + + # Kademlia "K" constant, this should be an even number + 'K': '8', + + # SHA1 is 160 bits long + 'HASH_LENGTH': '160', + + # checkpoint every this many seconds + 'CHECKPOINT_INTERVAL': '5m', # five minutes + + ### SEARCHING/STORING + # concurrent xmlrpc calls per find node/value request! + 'CONCURRENT_REQS': '4', + + # how many hosts to post to + 'STORE_REDUNDANCY': '3', + + # How many values to attempt to retrieve from the DHT. + # Setting this to 0 will try and get all values (which could take a while if + # a lot of nodes have values). Setting it negative will try to get that + # number of results from only the closest STORE_REDUNDANCY nodes to the hash. + # The default is a large negative number so all values from the closest + # STORE_REDUNDANCY nodes will be retrieved. + 'RETRIEVE_VALUES': '-10000', + + ### ROUTING TABLE STUFF + # how many times in a row a node can fail to respond before it's booted from the routing table + 'MAX_FAILURES': '3', + + # never ping a node more often than this + 'MIN_PING_INTERVAL': '15m', # fifteen minutes + + # refresh buckets that haven't been touched in this long + 'BUCKET_STALENESS': '1h', # one hour + + # expire entries older than this + 'KEY_EXPIRE': '1h', # 60 minutes + + # whether to spew info about the requests/responses in the protocol + 'SPEW': 'yes', +} + +class AptP2PConfigParser(SafeConfigParser): + """Adds 'gettime' and 'getstringlist' to ConfigParser objects. + + @ivar time_multipliers: the 'gettime' suffixes and the multipliers needed + to convert them to seconds + """ + + time_multipliers={ + 's': 1, #seconds + 'm': 60, #minutes + 'h': 3600, #hours + 'd': 86400,#days + } + + def gettime(self, section, option): + """Read the config parameter as a time value.""" + mult = 1 + value = self.get(section, option) + if len(value) == 0: + raise ConfigError("Configuration parse error: [%s] %s" % (section, option)) + suffix = value[-1].lower() + if suffix in self.time_multipliers.keys(): + mult = self.time_multipliers[suffix] + value = value[:-1] + return int(value)*mult + + def getstring(self, section, option): + """Read the config parameter as a string.""" + return self.get(section,option) + + def getstringlist(self, section, option): + """Read the multi-line config parameter as a list of strings.""" + return self.get(section,option).split() + + def optionxform(self, option): + """Use all uppercase in the config parameters names.""" + return option.upper() + +# Initialize the default config parameters +config = AptP2PConfigParser(DEFAULTS) +config.add_section(config.get('DEFAULT', 'DHT')) +for k in DHT_DEFAULTS: + config.set(config.get('DEFAULT', 'DHT'), k, DHT_DEFAULTS[k]) diff --git a/apt_p2p/db.py b/apt_p2p/db.py new file mode 100644 index 0000000..396f419 --- /dev/null +++ b/apt_p2p/db.py @@ -0,0 +1,421 @@ + +"""An sqlite database for storing persistent files and hashes.""" + +from datetime import datetime, timedelta +from pysqlite2 import dbapi2 as sqlite +from binascii import a2b_base64, b2a_base64 +from time import sleep +import os + +from twisted.python.filepath import FilePath +from twisted.trial import unittest + +assert sqlite.version_info >= (2, 1) + +class DBExcept(Exception): + """An error occurred in accessing the database.""" + pass + +class khash(str): + """Dummy class to convert all hashes to base64 for storing in the DB.""" + +# Initialize the database to work with 'khash' objects (binary strings) +sqlite.register_adapter(khash, b2a_base64) +sqlite.register_converter("KHASH", a2b_base64) +sqlite.register_converter("khash", a2b_base64) +sqlite.enable_callback_tracebacks(True) + +class DB: + """An sqlite database for storing persistent files and hashes. + + @type db: L{twisted.python.filepath.FilePath} + @ivar db: the database file to use + @type conn: L{pysqlite2.dbapi2.Connection} + @ivar conn: an open connection to the sqlite database + """ + + def __init__(self, db): + """Load or create the database file. + + @type db: L{twisted.python.filepath.FilePath} + @param db: the database file to use + """ + self.db = db + self.db.restat(False) + if self.db.exists(): + self._loadDB() + else: + self._createNewDB() + self.conn.text_factory = str + self.conn.row_factory = sqlite.Row + + def _loadDB(self): + """Open a new connection to the existing database file""" + try: + self.conn = sqlite.connect(database=self.db.path, detect_types=sqlite.PARSE_DECLTYPES) + except: + import traceback + raise DBExcept, "Couldn't open DB", traceback.format_exc() + + def _createNewDB(self): + """Open a connection to a new database and create the necessary tables.""" + if not self.db.parent().exists(): + self.db.parent().makedirs() + self.conn = sqlite.connect(database=self.db.path, detect_types=sqlite.PARSE_DECLTYPES) + c = self.conn.cursor() + c.execute("CREATE TABLE files (path TEXT PRIMARY KEY UNIQUE, hashID INTEGER, " + + "size NUMBER, mtime NUMBER)") + c.execute("CREATE TABLE hashes (hashID INTEGER PRIMARY KEY AUTOINCREMENT, " + + "hash KHASH UNIQUE, pieces KHASH, " + + "piecehash KHASH, refreshed TIMESTAMP)") + c.execute("CREATE INDEX hashes_refreshed ON hashes(refreshed)") + c.execute("CREATE INDEX hashes_piecehash ON hashes(piecehash)") + c.close() + self.conn.commit() + + def _removeChanged(self, file, row): + """If the file has changed or is missing, remove it from the DB. + + @type file: L{twisted.python.filepath.FilePath} + @param file: the file to check + @type row: C{dictionary}-like object + @param row: contains the expected 'size' and 'mtime' of the file + @rtype: C{boolean} + @return: True if the file is unchanged, False if it is changed, + and None if it is missing + """ + res = None + if row: + file.restat(False) + if file.exists(): + # Compare the current with the expected file properties + res = (row['size'] == file.getsize() and row['mtime'] == file.getmtime()) + if not res: + # Remove the file from the database + c = self.conn.cursor() + c.execute("DELETE FROM files WHERE path = ?", (file.path, )) + self.conn.commit() + c.close() + return res + + def storeFile(self, file, hash, pieces = ''): + """Store or update a file in the database. + + @type file: L{twisted.python.filepath.FilePath} + @param file: the file to check + @type hash: C{string} + @param hash: the hash of the file + @type pieces: C{string} + @param pieces: the concatenated list of the hashes of the pieces of + the file (optional, defaults to the empty string) + @return: True if the hash was not in the database before + (so it needs to be added to the DHT) + """ + # Hash the pieces to get the piecehash + piecehash = '' + if pieces: + s = sha.new().update(pieces) + piecehash = sha.digest() + + # Check the database for the hash + c = self.conn.cursor() + c.execute("SELECT hashID, piecehash FROM hashes WHERE hash = ?", (khash(hash), )) + row = c.fetchone() + if row: + assert piecehash == row['piecehash'] + new_hash = False + hashID = row['hashID'] + else: + # Add the new hash to the database + c = self.conn.cursor() + c.execute("INSERT OR REPLACE INTO hashes (hash, pieces, piecehash, refreshed) VALUES (?, ?, ?, ?)", + (khash(hash), khash(pieces), khash(piecehash), datetime.now())) + self.conn.commit() + new_hash = True + hashID = c.lastrowid + + # Add the file to the database + file.restat() + c.execute("INSERT OR REPLACE INTO files (path, hashID, size, mtime) VALUES (?, ?, ?, ?)", + (file.path, hashID, file.getsize(), file.getmtime())) + self.conn.commit() + c.close() + + return new_hash + + def getFile(self, file): + """Get a file from the database. + + If it has changed or is missing, it is removed from the database. + + @type file: L{twisted.python.filepath.FilePath} + @param file: the file to check + @return: dictionary of info for the file, False if changed, or + None if not in database or missing + """ + c = self.conn.cursor() + c.execute("SELECT hash, size, mtime, pieces FROM files JOIN hashes USING (hashID) WHERE path = ?", (file.path, )) + row = c.fetchone() + res = None + if row: + res = self._removeChanged(file, row) + if res: + res = {} + res['hash'] = row['hash'] + res['size'] = row['size'] + res['pieces'] = row['pieces'] + c.close() + return res + + def lookupHash(self, hash, filesOnly = False): + """Find a file by hash in the database. + + If any found files have changed or are missing, they are removed + from the database. If filesOnly is False then it will also look for + piece string hashes if no files can be found. + + @return: list of dictionaries of info for the found files + """ + # Try to find the hash in the files table + c = self.conn.cursor() + c.execute("SELECT path, size, mtime, refreshed, pieces FROM files JOIN hashes USING (hashID) WHERE hash = ?", (khash(hash), )) + row = c.fetchone() + files = [] + while row: + # Save the file to the list of found files + file = FilePath(row['path']) + res = self._removeChanged(file, row) + if res: + res = {} + res['path'] = file + res['size'] = row['size'] + res['refreshed'] = row['refreshed'] + res['pieces'] = row['pieces'] + files.append(res) + row = c.fetchone() + + if not filesOnly and not files: + # No files were found, so check the piecehashes as well + c.execute("SELECT refreshed, pieces, piecehash FROM hashes WHERE piecehash = ?", (khash(hash), )) + row = c.fetchone() + if row: + res = {} + res['refreshed'] = row['refreshed'] + res['pieces'] = row['pieces'] + files.append(res) + + c.close() + return files + + def isUnchanged(self, file): + """Check if a file in the file system has changed. + + If it has changed, it is removed from the database. + + @return: True if unchanged, False if changed, None if not in database + """ + c = self.conn.cursor() + c.execute("SELECT size, mtime FROM files WHERE path = ?", (file.path, )) + row = c.fetchone() + return self._removeChanged(file, row) + + def refreshHash(self, hash): + """Refresh the publishing time of a hash.""" + c = self.conn.cursor() + c.execute("UPDATE hashes SET refreshed = ? WHERE hash = ?", (datetime.now(), khash(hash))) + c.close() + + def expiredHashes(self, expireAfter): + """Find files that need refreshing after expireAfter seconds. + + For each hash that needs refreshing, finds all the files with that hash. + If the file has changed or is missing, it is removed from the table. + + @return: dictionary with keys the hashes, values a list of FilePaths + """ + t = datetime.now() - timedelta(seconds=expireAfter) + + # Find all the hashes that need refreshing + c = self.conn.cursor() + c.execute("SELECT hashID, hash, pieces FROM hashes WHERE refreshed < ?", (t, )) + row = c.fetchone() + expired = {} + while row: + res = expired.setdefault(row['hash'], {}) + res['hashID'] = row['hashID'] + res['hash'] = row['hash'] + res['pieces'] = row['pieces'] + row = c.fetchone() + + # Make sure there are still valid files for each hash + for hash in expired.values(): + valid = False + c.execute("SELECT path, size, mtime FROM files WHERE hashID = ?", (hash['hashID'], )) + row = c.fetchone() + while row: + res = self._removeChanged(FilePath(row['path']), row) + if res: + valid = True + row = c.fetchone() + if not valid: + # Remove hashes for which no files are still available + del expired[hash['hash']] + c.execute("DELETE FROM hashes WHERE hashID = ?", (hash['hashID'], )) + + self.conn.commit() + c.close() + + return expired + + def removeUntrackedFiles(self, dirs): + """Remove files that are no longer tracked by the program. + + @type dirs: C{list} of L{twisted.python.filepath.FilePath} + @param dirs: a list of the directories that we are tracking + @return: list of files that were removed + """ + assert len(dirs) >= 1 + + # Create a list of globs and an SQL statement for the directories + newdirs = [] + sql = "WHERE" + for dir in dirs: + newdirs.append(dir.child('*').path) + sql += " path NOT GLOB ? AND" + sql = sql[:-4] + + # Get a listing of all the files that will be removed + c = self.conn.cursor() + c.execute("SELECT path FROM files " + sql, newdirs) + row = c.fetchone() + removed = [] + while row: + removed.append(FilePath(row['path'])) + row = c.fetchone() + + # Delete all the removed files from the database + if removed: + c.execute("DELETE FROM files " + sql, newdirs) + self.conn.commit() + + return removed + + def close(self): + """Close the database connection.""" + self.conn.close() + +class TestDB(unittest.TestCase): + """Tests for the khashmir database.""" + + timeout = 5 + db = FilePath('/tmp/khashmir.db') + hash = '\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!' + directory = FilePath('/tmp/apt-p2p/') + file = FilePath('/tmp/apt-p2p/khashmir.test') + testfile = 'tmp/khashmir.test' + dirs = [FilePath('/tmp/apt-p2p/top1'), + FilePath('/tmp/apt-p2p/top2/sub1'), + FilePath('/tmp/apt-p2p/top2/sub2/')] + + def setUp(self): + if not self.file.parent().exists(): + self.file.parent().makedirs() + self.file.setContent('fgfhds') + self.file.touch() + self.store = DB(self.db) + self.store.storeFile(self.file, self.hash) + + def test_openExistingDB(self): + """Tests opening an existing database.""" + self.store.close() + self.store = None + sleep(1) + self.store = DB(self.db) + res = self.store.isUnchanged(self.file) + self.failUnless(res) + + def test_getFile(self): + """Tests retrieving a file from the database.""" + res = self.store.getFile(self.file) + self.failUnless(res) + self.failUnlessEqual(res['hash'], self.hash) + + def test_lookupHash(self): + """Tests looking up a hash in the database.""" + res = self.store.lookupHash(self.hash) + self.failUnless(res) + self.failUnlessEqual(len(res), 1) + self.failUnlessEqual(res[0]['path'].path, self.file.path) + + def test_isUnchanged(self): + """Tests checking if a file in the database is unchanged.""" + res = self.store.isUnchanged(self.file) + self.failUnless(res) + sleep(2) + self.file.touch() + res = self.store.isUnchanged(self.file) + self.failUnless(res == False) + res = self.store.isUnchanged(self.file) + self.failUnless(res is None) + + def test_expiry(self): + """Tests retrieving the files from the database that have expired.""" + res = self.store.expiredHashes(1) + self.failUnlessEqual(len(res.keys()), 0) + sleep(2) + res = self.store.expiredHashes(1) + self.failUnlessEqual(len(res.keys()), 1) + self.failUnlessEqual(res.keys()[0], self.hash) + self.store.refreshHash(self.hash) + res = self.store.expiredHashes(1) + self.failUnlessEqual(len(res.keys()), 0) + + def build_dirs(self): + for dir in self.dirs: + file = dir.preauthChild(self.testfile) + if not file.parent().exists(): + file.parent().makedirs() + file.setContent(file.path) + file.touch() + self.store.storeFile(file, self.hash) + + def test_multipleHashes(self): + """Tests looking up a hash with multiple files in the database.""" + self.build_dirs() + res = self.store.expiredHashes(1) + self.failUnlessEqual(len(res.keys()), 0) + res = self.store.lookupHash(self.hash) + self.failUnless(res) + self.failUnlessEqual(len(res), 4) + self.failUnlessEqual(res[0]['refreshed'], res[1]['refreshed']) + self.failUnlessEqual(res[0]['refreshed'], res[2]['refreshed']) + self.failUnlessEqual(res[0]['refreshed'], res[3]['refreshed']) + sleep(2) + res = self.store.expiredHashes(1) + self.failUnlessEqual(len(res.keys()), 1) + self.failUnlessEqual(res.keys()[0], self.hash) + self.store.refreshHash(self.hash) + res = self.store.expiredHashes(1) + self.failUnlessEqual(len(res.keys()), 0) + + def test_removeUntracked(self): + """Tests removing untracked files from the database.""" + self.build_dirs() + res = self.store.removeUntrackedFiles(self.dirs) + self.failUnlessEqual(len(res), 1, 'Got removed paths: %r' % res) + self.failUnlessEqual(res[0], self.file, 'Got removed paths: %r' % res) + res = self.store.removeUntrackedFiles(self.dirs) + self.failUnlessEqual(len(res), 0, 'Got removed paths: %r' % res) + res = self.store.removeUntrackedFiles(self.dirs[1:]) + self.failUnlessEqual(len(res), 1, 'Got removed paths: %r' % res) + self.failUnlessEqual(res[0], self.dirs[0].preauthChild(self.testfile), 'Got removed paths: %r' % res) + res = self.store.removeUntrackedFiles(self.dirs[:1]) + self.failUnlessEqual(len(res), 2, 'Got removed paths: %r' % res) + self.failUnlessIn(self.dirs[1].preauthChild(self.testfile), res, 'Got removed paths: %r' % res) + self.failUnlessIn(self.dirs[2].preauthChild(self.testfile), res, 'Got removed paths: %r' % res) + + def tearDown(self): + self.directory.remove() + self.store.close() + self.db.remove() + diff --git a/apt_p2p/interfaces.py b/apt_p2p/interfaces.py new file mode 100644 index 0000000..b38de39 --- /dev/null +++ b/apt_p2p/interfaces.py @@ -0,0 +1,43 @@ + +"""Some interfaces that are used by the apt-p2p classes.""" + +from zope.interface import Interface + +class IDHT(Interface): + """An abstract interface for using a DHT implementation.""" + + def loadConfig(self, config, section): + """Load the DHTs configuration from a dictionary. + + @type config: C{SafeConfigParser} + @param config: the dictionary of config values + """ + + def join(self): + """Bootstrap the new DHT node into the DHT. + + @rtype: C{Deferred} + @return: a deferred that will fire when the node has joined + """ + + def leave(self): + """Depart gracefully from the DHT. + + @rtype: C{Deferred} + @return: a deferred that will fire when the node has left + """ + + def getValue(self, key): + """Get a value from the DHT for the specified key. + + The length of the key may be adjusted for use with the DHT. + + @rtype: C{Deferred} + @return: a deferred that will fire with the stored values + """ + + def storeValue(self, key, value): + """Store a value in the DHT for the specified key. + + The length of the key may be adjusted for use with the DHT. + """ diff --git a/apt_p2p/policies.py b/apt_p2p/policies.py new file mode 100644 index 0000000..e7bae81 --- /dev/null +++ b/apt_p2p/policies.py @@ -0,0 +1,702 @@ +# -*- test-case-name: twisted.test.test_policies -*- +# Copyright (c) 2001-2007 Twisted Matrix Laboratories. +# See LICENSE for details. + + +""" +Resource limiting policies. + +@seealso: See also L{twisted.protocols.htb} for rate limiting. +""" + +# system imports +import sys, operator + +# twisted imports +from twisted.internet.protocol import ServerFactory, Protocol, ClientFactory +from twisted.internet import reactor, error +from twisted.python import log +from zope.interface import providedBy, directlyProvides + + +class ProtocolWrapper(Protocol): + """Wraps protocol instances and acts as their transport as well.""" + + disconnecting = 0 + + def __init__(self, factory, wrappedProtocol): + self.wrappedProtocol = wrappedProtocol + self.factory = factory + + def makeConnection(self, transport): + directlyProvides(self, *providedBy(self) + providedBy(transport)) + Protocol.makeConnection(self, transport) + + # Transport relaying + + def write(self, data): + self.transport.write(data) + + def writeSequence(self, data): + self.transport.writeSequence(data) + + def loseConnection(self): + self.disconnecting = 1 + self.transport.loseConnection() + + def getPeer(self): + return self.transport.getPeer() + + def getHost(self): + return self.transport.getHost() + + def registerProducer(self, producer, streaming): + self.transport.registerProducer(producer, streaming) + + def unregisterProducer(self): + self.transport.unregisterProducer() + + def stopConsuming(self): + self.transport.stopConsuming() + + def __getattr__(self, name): + return getattr(self.transport, name) + + # Protocol relaying + + def connectionMade(self): + self.factory.registerProtocol(self) + self.wrappedProtocol.makeConnection(self) + + def dataReceived(self, data): + self.wrappedProtocol.dataReceived(data) + + def connectionLost(self, reason): + self.factory.unregisterProtocol(self) + self.wrappedProtocol.connectionLost(reason) + + +class WrappingFactory(ClientFactory): + """Wraps a factory and its protocols, and keeps track of them.""" + + protocol = ProtocolWrapper + + def __init__(self, wrappedFactory): + self.wrappedFactory = wrappedFactory + self.protocols = {} + + def doStart(self): + self.wrappedFactory.doStart() + ClientFactory.doStart(self) + + def doStop(self): + self.wrappedFactory.doStop() + ClientFactory.doStop(self) + + def startedConnecting(self, connector): + self.wrappedFactory.startedConnecting(connector) + + def clientConnectionFailed(self, connector, reason): + self.wrappedFactory.clientConnectionFailed(connector, reason) + + def clientConnectionLost(self, connector, reason): + self.wrappedFactory.clientConnectionLost(connector, reason) + + def buildProtocol(self, addr): + return self.protocol(self, self.wrappedFactory.buildProtocol(addr)) + + def registerProtocol(self, p): + """Called by protocol to register itself.""" + self.protocols[p] = 1 + + def unregisterProtocol(self, p): + """Called by protocols when they go away.""" + del self.protocols[p] + + +class ThrottlingProtocol(ProtocolWrapper): + """Protocol for ThrottlingFactory.""" + + # wrap API for tracking bandwidth + + def __init__(self, factory, wrappedProtocol): + ProtocolWrapper.__init__(self, factory, wrappedProtocol) + self._tempDataBuffer = [] + self._tempDataLength = 0 + self.throttled = False + + def write(self, data): + # Check if we can write + if not self.throttled: + paused = self.factory.registerWritten(len(data)) + if not paused: + ProtocolWrapper.write(self, data) + + if paused is not None and hasattr(self, "producer") and self.producer and not self.producer.paused: + # Interrupt the flow so that others can can have a chance + # We can only do this if it's not already paused otherwise we + # risk unpausing something that the Server paused + self.producer.pauseProducing() + reactor.callLater(0, self.producer.resumeProducing) + + if self.throttled or paused: + # Can't write, buffer the data + self._tempDataBuffer.append(data) + self._tempDataLength += len(data) + self._throttleWrites() + + def writeSequence(self, seq): + if not self.throttled: + # Write each sequence separately + while seq and not self.factory.registerWritten(len(seq[0])): + ProtocolWrapper.write(self, seq.pop(0)) + + # If there's some left, we must have been paused + if seq: + self._tempDataBuffer.extend(seq) + self._tempDataLength += reduce(operator.add, map(len, seq)) + self._throttleWrites() + + def dataReceived(self, data): + self.factory.registerRead(len(data)) + ProtocolWrapper.dataReceived(self, data) + + def registerProducer(self, producer, streaming): + assert streaming, "You can only use the ThrottlingProtocol with streaming (push) producers." + self.producer = producer + ProtocolWrapper.registerProducer(self, producer, streaming) + + def unregisterProducer(self): + del self.producer + ProtocolWrapper.unregisterProducer(self) + + + def throttleReads(self): + self.transport.pauseProducing() + + def unthrottleReads(self): + self.transport.resumeProducing() + + def _throttleWrites(self): + # If we haven't yet, queue for unthrottling + if not self.throttled: + self.throttled = True + self.factory.throttledWrites(self) + + if hasattr(self, "producer") and self.producer: + self.producer.pauseProducing() + + def unthrottleWrites(self): + # Write some data + if self._tempDataBuffer: + assert not self.factory.registerWritten(len(self._tempDataBuffer[0])) + self._tempDataLength -= len(self._tempDataBuffer[0]) + ProtocolWrapper.write(self, self._tempDataBuffer.pop(0)) + assert self._tempDataLength >= 0 + + # If we wrote it all, start producing more + if not self._tempDataBuffer: + assert self._tempDataLength == 0 + self.throttled = False + if hasattr(self, "producer") and self.producer: + # This might unpause something the Server has also paused, but + # it will get paused again on first write anyway + reactor.callLater(0, self.producer.resumeProducing) + + return self._tempDataLength + + +class ThrottlingFactory(WrappingFactory): + """ + Throttles bandwidth and number of connections. + + Write bandwidth will only be throttled if there is a producer + registered. + """ + + protocol = ThrottlingProtocol + CHUNK_SIZE = 4*1024 + + def __init__(self, wrappedFactory, maxConnectionCount=sys.maxint, + readLimit=None, writeLimit=None): + WrappingFactory.__init__(self, wrappedFactory) + self.connectionCount = 0 + self.maxConnectionCount = maxConnectionCount + self.readLimit = readLimit # max bytes we should read per second + self.writeLimit = writeLimit # max bytes we should write per second + self.readThisSecond = 0 + self.writeAvailable = writeLimit + self._writeQueue = [] + self.unthrottleReadsID = None + self.checkReadBandwidthID = None + self.unthrottleWritesID = None + self.checkWriteBandwidthID = None + + + def callLater(self, period, func): + """ + Wrapper around L{reactor.callLater} for test purpose. + """ + return reactor.callLater(period, func) + + + def registerWritten(self, length): + """ + Called by protocol to tell us more bytes were written. + Returns True if the bytes could not be written and the protocol should pause itself. + """ + # Check if there are bytes available to write + if self.writeLimit is None: + return None + elif self.writeAvailable > 0: + self.writeAvailable -= length + return False + + return True + + + def throttledWrites(self, p): + """ + Called by the protocol to queue it for later writing. + """ + assert p not in self._writeQueue + self._writeQueue.append(p) + + + def registerRead(self, length): + """ + Called by protocol to tell us more bytes were read. + """ + self.readThisSecond += length + + + def checkReadBandwidth(self): + """ + Checks if we've passed bandwidth limits. + """ + if self.readThisSecond > self.readLimit: + self.throttleReads() + throttleTime = (float(self.readThisSecond) / self.readLimit) - 1.0 + self.unthrottleReadsID = self.callLater(throttleTime, + self.unthrottleReads) + self.readThisSecond = 0 + self.checkReadBandwidthID = self.callLater(1, self.checkReadBandwidth) + + + def checkWriteBandwidth(self): + """ + Add some new available bandwidth, and check for protocols to unthrottle. + """ + # Increase the available write bytes, but not higher than the limit + self.writeAvailable = min(self.writeLimit, self.writeAvailable + self.writeLimit) + + # Write from the queue until it's empty or we're throttled again + while self.writeAvailable > 0 and self._writeQueue: + # Get the first queued protocol + p = self._writeQueue.pop(0) + _tempWriteAvailable = self.writeAvailable + bytesLeft = 1 + + # Unthrottle writes until CHUNK_SIZE is reached or the protocol is unbuffered + while self.writeAvailable > 0 and _tempWriteAvailable - self.writeAvailable < self.CHUNK_SIZE and bytesLeft > 0: + # Unthrottle a single write (from the protocol's buffer) + bytesLeft = p.unthrottleWrites() + + # If the protocol is not done, requeue it + if bytesLeft > 0: + self._writeQueue.append(p) + + self.checkWriteBandwidthID = self.callLater(1, self.checkWriteBandwidth) + + + def throttleReads(self): + """ + Throttle reads on all protocols. + """ + log.msg("Throttling reads on %s" % self) + for p in self.protocols.keys(): + p.throttleReads() + + + def unthrottleReads(self): + """ + Stop throttling reads on all protocols. + """ + self.unthrottleReadsID = None + log.msg("Stopped throttling reads on %s" % self) + for p in self.protocols.keys(): + p.unthrottleReads() + + + def buildProtocol(self, addr): + if self.connectionCount == 0: + if self.readLimit is not None: + self.checkReadBandwidth() + if self.writeLimit is not None: + self.checkWriteBandwidth() + + if self.connectionCount < self.maxConnectionCount: + self.connectionCount += 1 + return WrappingFactory.buildProtocol(self, addr) + else: + log.msg("Max connection count reached!") + return None + + + def unregisterProtocol(self, p): + WrappingFactory.unregisterProtocol(self, p) + self.connectionCount -= 1 + if self.connectionCount == 0: + if self.unthrottleReadsID is not None: + self.unthrottleReadsID.cancel() + if self.checkReadBandwidthID is not None: + self.checkReadBandwidthID.cancel() + if self.unthrottleWritesID is not None: + self.unthrottleWritesID.cancel() + if self.checkWriteBandwidthID is not None: + self.checkWriteBandwidthID.cancel() + + + +class SpewingProtocol(ProtocolWrapper): + def dataReceived(self, data): + log.msg("Received: %r" % data) + ProtocolWrapper.dataReceived(self,data) + + def write(self, data): + log.msg("Sending: %r" % data) + ProtocolWrapper.write(self,data) + + + +class SpewingFactory(WrappingFactory): + protocol = SpewingProtocol + + + +class LimitConnectionsByPeer(WrappingFactory): + """Stability: Unstable""" + + maxConnectionsPerPeer = 5 + + def startFactory(self): + self.peerConnections = {} + + def buildProtocol(self, addr): + peerHost = addr[0] + connectionCount = self.peerConnections.get(peerHost, 0) + if connectionCount >= self.maxConnectionsPerPeer: + return None + self.peerConnections[peerHost] = connectionCount + 1 + return WrappingFactory.buildProtocol(self, addr) + + def unregisterProtocol(self, p): + peerHost = p.getPeer()[1] + self.peerConnections[peerHost] -= 1 + if self.peerConnections[peerHost] == 0: + del self.peerConnections[peerHost] + + +class LimitTotalConnectionsFactory(ServerFactory): + """Factory that limits the number of simultaneous connections. + + API Stability: Unstable + + @type connectionCount: C{int} + @ivar connectionCount: number of current connections. + @type connectionLimit: C{int} or C{None} + @cvar connectionLimit: maximum number of connections. + @type overflowProtocol: L{Protocol} or C{None} + @cvar overflowProtocol: Protocol to use for new connections when + connectionLimit is exceeded. If C{None} (the default value), excess + connections will be closed immediately. + """ + connectionCount = 0 + connectionLimit = None + overflowProtocol = None + + def buildProtocol(self, addr): + if (self.connectionLimit is None or + self.connectionCount < self.connectionLimit): + # Build the normal protocol + wrappedProtocol = self.protocol() + elif self.overflowProtocol is None: + # Just drop the connection + return None + else: + # Too many connections, so build the overflow protocol + wrappedProtocol = self.overflowProtocol() + + wrappedProtocol.factory = self + protocol = ProtocolWrapper(self, wrappedProtocol) + self.connectionCount += 1 + return protocol + + def registerProtocol(self, p): + pass + + def unregisterProtocol(self, p): + self.connectionCount -= 1 + + + +class TimeoutProtocol(ProtocolWrapper): + """ + Protocol that automatically disconnects when the connection is idle. + + Stability: Unstable + """ + + def __init__(self, factory, wrappedProtocol, timeoutPeriod): + """ + Constructor. + + @param factory: An L{IFactory}. + @param wrappedProtocol: A L{Protocol} to wrapp. + @param timeoutPeriod: Number of seconds to wait for activity before + timing out. + """ + ProtocolWrapper.__init__(self, factory, wrappedProtocol) + self.timeoutCall = None + self.setTimeout(timeoutPeriod) + + + def setTimeout(self, timeoutPeriod=None): + """ + Set a timeout. + + This will cancel any existing timeouts. + + @param timeoutPeriod: If not C{None}, change the timeout period. + Otherwise, use the existing value. + """ + self.cancelTimeout() + if timeoutPeriod is not None: + self.timeoutPeriod = timeoutPeriod + self.timeoutCall = self.factory.callLater(self.timeoutPeriod, self.timeoutFunc) + + + def cancelTimeout(self): + """ + Cancel the timeout. + + If the timeout was already cancelled, this does nothing. + """ + if self.timeoutCall: + try: + self.timeoutCall.cancel() + except error.AlreadyCalled: + pass + self.timeoutCall = None + + + def resetTimeout(self): + """ + Reset the timeout, usually because some activity just happened. + """ + if self.timeoutCall: + self.timeoutCall.reset(self.timeoutPeriod) + + + def write(self, data): + self.resetTimeout() + ProtocolWrapper.write(self, data) + + + def writeSequence(self, seq): + self.resetTimeout() + ProtocolWrapper.writeSequence(self, seq) + + + def dataReceived(self, data): + self.resetTimeout() + ProtocolWrapper.dataReceived(self, data) + + + def connectionLost(self, reason): + self.cancelTimeout() + ProtocolWrapper.connectionLost(self, reason) + + + def timeoutFunc(self): + """ + This method is called when the timeout is triggered. + + By default it calls L{loseConnection}. Override this if you want + something else to happen. + """ + self.loseConnection() + + + +class TimeoutFactory(WrappingFactory): + """ + Factory for TimeoutWrapper. + + Stability: Unstable + """ + protocol = TimeoutProtocol + + + def __init__(self, wrappedFactory, timeoutPeriod=30*60): + self.timeoutPeriod = timeoutPeriod + WrappingFactory.__init__(self, wrappedFactory) + + + def buildProtocol(self, addr): + return self.protocol(self, self.wrappedFactory.buildProtocol(addr), + timeoutPeriod=self.timeoutPeriod) + + + def callLater(self, period, func): + """ + Wrapper around L{reactor.callLater} for test purpose. + """ + return reactor.callLater(period, func) + + + +class TrafficLoggingProtocol(ProtocolWrapper): + + def __init__(self, factory, wrappedProtocol, logfile, lengthLimit=None, + number=0): + """ + @param factory: factory which created this protocol. + @type factory: C{protocol.Factory}. + @param wrappedProtocol: the underlying protocol. + @type wrappedProtocol: C{protocol.Protocol}. + @param logfile: file opened for writing used to write log messages. + @type logfile: C{file} + @param lengthLimit: maximum size of the datareceived logged. + @type lengthLimit: C{int} + @param number: identifier of the connection. + @type number: C{int}. + """ + ProtocolWrapper.__init__(self, factory, wrappedProtocol) + self.logfile = logfile + self.lengthLimit = lengthLimit + self._number = number + + + def _log(self, line): + self.logfile.write(line + '\n') + self.logfile.flush() + + + def _mungeData(self, data): + if self.lengthLimit and len(data) > self.lengthLimit: + data = data[:self.lengthLimit - 12] + '<... elided>' + return data + + + # IProtocol + def connectionMade(self): + self._log('*') + return ProtocolWrapper.connectionMade(self) + + + def dataReceived(self, data): + self._log('C %d: %r' % (self._number, self._mungeData(data))) + return ProtocolWrapper.dataReceived(self, data) + + + def connectionLost(self, reason): + self._log('C %d: %r' % (self._number, reason)) + return ProtocolWrapper.connectionLost(self, reason) + + + # ITransport + def write(self, data): + self._log('S %d: %r' % (self._number, self._mungeData(data))) + return ProtocolWrapper.write(self, data) + + + def writeSequence(self, iovec): + self._log('SV %d: %r' % (self._number, [self._mungeData(d) for d in iovec])) + return ProtocolWrapper.writeSequence(self, iovec) + + + def loseConnection(self): + self._log('S %d: *' % (self._number,)) + return ProtocolWrapper.loseConnection(self) + + + +class TrafficLoggingFactory(WrappingFactory): + protocol = TrafficLoggingProtocol + + _counter = 0 + + def __init__(self, wrappedFactory, logfilePrefix, lengthLimit=None): + self.logfilePrefix = logfilePrefix + self.lengthLimit = lengthLimit + WrappingFactory.__init__(self, wrappedFactory) + + + def open(self, name): + return file(name, 'w') + + + def buildProtocol(self, addr): + self._counter += 1 + logfile = self.open(self.logfilePrefix + '-' + str(self._counter)) + return self.protocol(self, self.wrappedFactory.buildProtocol(addr), + logfile, self.lengthLimit, self._counter) + + + def resetCounter(self): + """ + Reset the value of the counter used to identify connections. + """ + self._counter = 0 + + + +class TimeoutMixin: + """Mixin for protocols which wish to timeout connections + + @cvar timeOut: The number of seconds after which to timeout the connection. + """ + timeOut = None + + __timeoutCall = None + + def callLater(self, period, func): + return reactor.callLater(period, func) + + + def resetTimeout(self): + """Reset the timeout count down""" + if self.__timeoutCall is not None and self.timeOut is not None: + self.__timeoutCall.reset(self.timeOut) + + def setTimeout(self, period): + """Change the timeout period + + @type period: C{int} or C{NoneType} + @param period: The period, in seconds, to change the timeout to, or + C{None} to disable the timeout. + """ + prev = self.timeOut + self.timeOut = period + + if self.__timeoutCall is not None: + if period is None: + self.__timeoutCall.cancel() + self.__timeoutCall = None + else: + self.__timeoutCall.reset(period) + elif period is not None: + self.__timeoutCall = self.callLater(period, self.__timedOut) + + return prev + + def __timedOut(self): + self.__timeoutCall = None + self.timeoutConnection() + + def timeoutConnection(self): + """Called when the connection times out. + Override to define behavior other than dropping the connection. + """ + self.transport.loseConnection() diff --git a/apt_p2p/util.py b/apt_p2p/util.py new file mode 100644 index 0000000..c334d1d --- /dev/null +++ b/apt_p2p/util.py @@ -0,0 +1,167 @@ + +"""Some utitlity functions for use in the apt-p2p program. + +@var isLocal: a compiled regular expression suitable for testing if an + IP address is from a known local or private range +""" + +import os, re + +from twisted.python import log +from twisted.trial import unittest + +isLocal = re.compile('^(192\.168\.[0-9]{1,3}\.[0-9]{1,3})|'+ + '(10\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})|'+ + '(172\.0?([1][6-9])|([2][0-9])|([3][0-1])\.[0-9]{1,3}\.[0-9]{1,3})|'+ + '(127\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})$') + +def findMyIPAddr(addrs, intended_port, local_ok = False): + """Find the best IP address to use from a list of possibilities. + + @param addrs: the list of possible IP addresses + @param intended_port: the port that was supposed to be used + @param local_ok: whether known local/private IP ranges are allowed + (defaults to False) + @return: the preferred IP address, or None if one couldn't be found + """ + log.msg("got addrs: %r" % (addrs,)) + my_addr = None + + # Try to find an address using the ifconfig function + try: + ifconfig = os.popen("/sbin/ifconfig |/bin/grep inet|"+ + "/usr/bin/awk '{print $2}' | "+ + "sed -e s/.*://", "r").read().strip().split('\n') + except: + ifconfig = [] + + # Get counts for all the non-local addresses returned from ifconfig + addr_count = {} + for addr in ifconfig: + if local_ok or not isLocal.match(addr): + addr_count.setdefault(addr, 0) + addr_count[addr] += 1 + + # If only one was found, use it as a starting point + local_addrs = addr_count.keys() + if len(local_addrs) == 1: + my_addr = local_addrs[0] + log.msg('Found remote address from ifconfig: %r' % (my_addr,)) + + # Get counts for all the non-local addresses returned from the DHT + addr_count = {} + port_count = {} + for addr in addrs: + if local_ok or not isLocal.match(addr[0]): + addr_count.setdefault(addr[0], 0) + addr_count[addr[0]] += 1 + port_count.setdefault(addr[1], 0) + port_count[addr[1]] += 1 + + # Find the most popular address + popular_addr = [] + popular_count = 0 + for addr in addr_count: + if addr_count[addr] > popular_count: + popular_addr = [addr] + popular_count = addr_count[addr] + elif addr_count[addr] == popular_count: + popular_addr.append(addr) + + # Find the most popular port + popular_port = [] + popular_count = 0 + for port in port_count: + if port_count[port] > popular_count: + popular_port = [port] + popular_count = port_count[port] + elif port_count[port] == popular_count: + popular_port.append(port) + + # Check to make sure the port isn't being changed + port = intended_port + if len(port_count.keys()) > 1: + log.msg('Problem, multiple ports have been found: %r' % (port_count,)) + if port not in port_count.keys(): + log.msg('And none of the ports found match the intended one') + elif len(port_count.keys()) == 1: + port = port_count.keys()[0] + else: + log.msg('Port was not found') + + # If one is popular, use that address + if len(popular_addr) == 1: + log.msg('Found popular address: %r' % (popular_addr[0],)) + if my_addr and my_addr != popular_addr[0]: + log.msg('But the popular address does not match: %s != %s' % (popular_addr[0], my_addr)) + my_addr = popular_addr[0] + elif len(popular_addr) > 1: + log.msg('Found multiple popular addresses: %r' % (popular_addr,)) + if my_addr and my_addr not in popular_addr: + log.msg('And none of the addresses found match the ifconfig one') + else: + log.msg('No non-local addresses found: %r' % (popular_addr,)) + + if not my_addr: + log.msg("Remote IP Address could not be found for this machine") + + return my_addr + +def ipAddrFromChicken(): + """Retrieve a possible IP address from the ipchecken website.""" + import urllib + ip_search = re.compile('\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}') + try: + f = urllib.urlopen("http://www.ipchicken.com") + data = f.read() + f.close() + current_ip = ip_search.findall(data) + return current_ip + except Exception: + return [] + +def uncompact(s): + """Extract the contact info from a compact peer representation. + + @type s: C{string} + @param s: the compact representation + @rtype: (C{string}, C{int}) + @return: the IP address and port number to contact the peer on + @raise ValueError: if the compact representation doesn't exist + """ + if (len(s) != 6): + raise ValueError + ip = '.'.join([str(ord(i)) for i in s[0:4]]) + port = (ord(s[4]) << 8) | ord(s[5]) + return (ip, port) + +def compact(ip, port): + """Create a compact representation of peer contact info. + + @type ip: C{string} + @param ip: the IP address of the peer + @type port: C{int} + @param port: the port number to contact the peer on + @rtype: C{string} + @return: the compact representation + @raise ValueError: if the compact representation doesn't exist + """ + + s = ''.join([chr(int(i)) for i in ip.split('.')]) + \ + chr((port & 0xFF00) >> 8) + chr(port & 0xFF) + if len(s) != 6: + raise ValueError + return s + +class TestUtil(unittest.TestCase): + """Tests for the utilities.""" + + timeout = 5 + ip = '165.234.1.34' + port = 61234 + + def test_compact(self): + """Make sure compacting is reversed correctly by uncompacting.""" + d = uncompact(compact(self.ip, self.port)) + self.failUnlessEqual(d[0], self.ip) + self.failUnlessEqual(d[1], self.port) diff --git a/apt_p2p_Khashmir/DHT.py b/apt_p2p_Khashmir/DHT.py new file mode 100644 index 0000000..399babf --- /dev/null +++ b/apt_p2p_Khashmir/DHT.py @@ -0,0 +1,454 @@ + +"""The main interface to the Khashmir DHT. + +@var khashmir_dir: the name of the directory to use for DHT files +""" + +from datetime import datetime +import os, sha, random + +from twisted.internet import defer, reactor +from twisted.internet.abstract import isIPAddress +from twisted.python import log +from twisted.trial import unittest +from zope.interface import implements + +from apt_p2p.interfaces import IDHT +from khashmir import Khashmir +from bencode import bencode, bdecode + +khashmir_dir = 'apt-p2p-Khashmir' + +class DHTError(Exception): + """Represents errors that occur in the DHT.""" + +class DHT: + """The main interface instance to the Khashmir DHT. + + @type config: C{dictionary} + @ivar config: the DHT configuration values + @type cache_dir: C{string} + @ivar cache_dir: the directory to use for storing files + @type bootstrap: C{list} of C{string} + @ivar bootstrap: the nodes to contact to bootstrap into the system + @type bootstrap_node: C{boolean} + @ivar bootstrap_node: whether this node is a bootstrap node + @type joining: L{twisted.internet.defer.Deferred} + @ivar joining: if a join is underway, the deferred that will signal it's end + @type joined: C{boolean} + @ivar joined: whether the DHT network has been successfully joined + @type outstandingJoins: C{int} + @ivar outstandingJoins: the number of bootstrap nodes that have yet to respond + @type foundAddrs: C{list} of (C{string}, C{int}) + @ivar foundAddrs: the IP address an port that were returned by bootstrap nodes + @type storing: C{dictionary} + @ivar storing: keys are keys for which store requests are active, values + are dictionaries with keys the values being stored and values the + deferred to call when complete + @type retrieving: C{dictionary} + @ivar retrieving: keys are the keys for which getValue requests are active, + values are lists of the deferreds waiting for the requests + @type retrieved: C{dictionary} + @ivar retrieved: keys are the keys for which getValue requests are active, + values are list of the values returned so far + @type config_parser: L{apt_p2p.apt_p2p_conf.AptP2PConfigParser} + @ivar config_parser: the configuration info for the main program + @type section: C{string} + @ivar section: the section of the configuration info that applies to the DHT + @type khashmir: L{khashmir.Khashmir} + @ivar khashmir: the khashmir DHT instance to use + """ + + implements(IDHT) + + def __init__(self): + """Initialize the DHT.""" + self.config = None + self.cache_dir = '' + self.bootstrap = [] + self.bootstrap_node = False + self.joining = None + self.joined = False + self.outstandingJoins = 0 + self.foundAddrs = [] + self.storing = {} + self.retrieving = {} + self.retrieved = {} + + def loadConfig(self, config, section): + """See L{apt_p2p.interfaces.IDHT}.""" + self.config_parser = config + self.section = section + self.config = {} + + # Get some initial values + self.cache_dir = os.path.join(self.config_parser.get(section, 'cache_dir'), khashmir_dir) + if not os.path.exists(self.cache_dir): + os.makedirs(self.cache_dir) + self.bootstrap = self.config_parser.getstringlist(section, 'BOOTSTRAP') + self.bootstrap_node = self.config_parser.getboolean(section, 'BOOTSTRAP_NODE') + for k in self.config_parser.options(section): + # The numbers in the config file + if k in ['K', 'HASH_LENGTH', 'CONCURRENT_REQS', 'STORE_REDUNDANCY', + 'RETRIEVE_VALUES', 'MAX_FAILURES', 'PORT']: + self.config[k] = self.config_parser.getint(section, k) + # The times in the config file + elif k in ['CHECKPOINT_INTERVAL', 'MIN_PING_INTERVAL', + 'BUCKET_STALENESS', 'KEY_EXPIRE']: + self.config[k] = self.config_parser.gettime(section, k) + # The booleans in the config file + elif k in ['SPEW']: + self.config[k] = self.config_parser.getboolean(section, k) + # Everything else is a string + else: + self.config[k] = self.config_parser.get(section, k) + + def join(self): + """See L{apt_p2p.interfaces.IDHT}.""" + if self.config is None: + raise DHTError, "configuration not loaded" + if self.joining: + raise DHTError, "a join is already in progress" + + # Create the new khashmir instance + self.khashmir = Khashmir(self.config, self.cache_dir) + + self.joining = defer.Deferred() + for node in self.bootstrap: + host, port = node.rsplit(':', 1) + port = int(port) + + # Translate host names into IP addresses + if isIPAddress(host): + self._join_gotIP(host, port) + else: + reactor.resolve(host).addCallback(self._join_gotIP, port) + + return self.joining + + def _join_gotIP(self, ip, port): + """Join the DHT using a single bootstrap nodes IP address.""" + self.outstandingJoins += 1 + self.khashmir.addContact(ip, port, self._join_single, self._join_error) + + def _join_single(self, addr): + """Process the response from the bootstrap node. + + Finish the join by contacting close nodes. + """ + self.outstandingJoins -= 1 + if addr: + self.foundAddrs.append(addr) + if addr or self.outstandingJoins <= 0: + self.khashmir.findCloseNodes(self._join_complete, self._join_complete) + log.msg('Got back from bootstrap node: %r' % (addr,)) + + def _join_error(self, failure = None): + """Process an error in contacting the bootstrap node. + + If no bootstrap nodes remain, finish the process by contacting + close nodes. + """ + self.outstandingJoins -= 1 + log.msg("bootstrap node could not be reached") + if self.outstandingJoins <= 0: + self.khashmir.findCloseNodes(self._join_complete, self._join_complete) + + def _join_complete(self, result): + """End the joining process and return the addresses found for this node.""" + if not self.joined and len(result) > 0: + self.joined = True + if self.joining and self.outstandingJoins <= 0: + df = self.joining + self.joining = None + if self.joined or self.bootstrap_node: + self.joined = True + df.callback(self.foundAddrs) + else: + df.errback(DHTError('could not find any nodes to bootstrap to')) + + def getAddrs(self): + """Get the list of addresses returned by bootstrap nodes for this node.""" + return self.foundAddrs + + def leave(self): + """See L{apt_p2p.interfaces.IDHT}.""" + if self.config is None: + raise DHTError, "configuration not loaded" + + if self.joined or self.joining: + if self.joining: + self.joining.errback(DHTError('still joining when leave was called')) + self.joining = None + self.joined = False + self.khashmir.shutdown() + + def _normKey(self, key, bits=None, bytes=None): + """Normalize the length of keys used in the DHT.""" + bits = self.config["HASH_LENGTH"] + if bits is not None: + bytes = (bits - 1) // 8 + 1 + else: + if bytes is None: + raise DHTError, "you must specify one of bits or bytes for normalization" + + # Extend short keys with null bytes + if len(key) < bytes: + key = key + '\000'*(bytes - len(key)) + # Truncate long keys + elif len(key) > bytes: + key = key[:bytes] + return key + + def getValue(self, key): + """See L{apt_p2p.interfaces.IDHT}.""" + if self.config is None: + raise DHTError, "configuration not loaded" + if not self.joined: + raise DHTError, "have not joined a network yet" + + key = self._normKey(key) + + d = defer.Deferred() + if key not in self.retrieving: + self.khashmir.valueForKey(key, self._getValue) + self.retrieving.setdefault(key, []).append(d) + return d + + def _getValue(self, key, result): + """Process a returned list of values from the DHT.""" + # Save the list of values to return when it is complete + if result: + self.retrieved.setdefault(key, []).extend([bdecode(r) for r in result]) + else: + # Empty list, the get is complete, return the result + final_result = [] + if key in self.retrieved: + final_result = self.retrieved[key] + del self.retrieved[key] + for i in range(len(self.retrieving[key])): + d = self.retrieving[key].pop(0) + d.callback(final_result) + del self.retrieving[key] + + def storeValue(self, key, value): + """See L{apt_p2p.interfaces.IDHT}.""" + if self.config is None: + raise DHTError, "configuration not loaded" + if not self.joined: + raise DHTError, "have not joined a network yet" + + key = self._normKey(key) + bvalue = bencode(value) + + if key in self.storing and bvalue in self.storing[key]: + raise DHTError, "already storing that key with the same value" + + d = defer.Deferred() + self.khashmir.storeValueForKey(key, bvalue, self._storeValue) + self.storing.setdefault(key, {})[bvalue] = d + return d + + def _storeValue(self, key, bvalue, result): + """Process the response from the DHT.""" + if key in self.storing and bvalue in self.storing[key]: + # Check if the store succeeded + if len(result) > 0: + self.storing[key][bvalue].callback(result) + else: + self.storing[key][bvalue].errback(DHTError('could not store value %s in key %s' % (bvalue, key))) + del self.storing[key][bvalue] + if len(self.storing[key].keys()) == 0: + del self.storing[key] + +class TestSimpleDHT(unittest.TestCase): + """Simple 2-node unit tests for the DHT.""" + + timeout = 2 + DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160, + 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4, + 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000, + 'MAX_FAILURES': 3, + 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600, + 'KEY_EXPIRE': 3600, 'SPEW': False, } + + def setUp(self): + self.a = DHT() + self.b = DHT() + self.a.config = self.DHT_DEFAULTS.copy() + self.a.config['PORT'] = 4044 + self.a.bootstrap = ["127.0.0.1:4044"] + self.a.bootstrap_node = True + self.a.cache_dir = '/tmp' + self.b.config = self.DHT_DEFAULTS.copy() + self.b.config['PORT'] = 4045 + self.b.bootstrap = ["127.0.0.1:4044"] + self.b.cache_dir = '/tmp' + + def test_bootstrap_join(self): + d = self.a.join() + return d + + def node_join(self, result): + d = self.b.join() + return d + + def test_join(self): + self.lastDefer = defer.Deferred() + d = self.a.join() + d.addCallback(self.node_join) + d.addCallback(self.lastDefer.callback) + return self.lastDefer + + def test_normKey(self): + h = self.a._normKey('12345678901234567890') + self.failUnless(h == '12345678901234567890') + h = self.a._normKey('12345678901234567') + self.failUnless(h == '12345678901234567\000\000\000') + h = self.a._normKey('1234567890123456789012345') + self.failUnless(h == '12345678901234567890') + h = self.a._normKey('1234567890123456789') + self.failUnless(h == '1234567890123456789\000') + h = self.a._normKey('123456789012345678901') + self.failUnless(h == '12345678901234567890') + + def value_stored(self, result, value): + self.stored -= 1 + if self.stored == 0: + self.get_values() + + def store_values(self, result): + self.stored = 3 + d = self.a.storeValue(sha.new('4045').digest(), str(4045*3)) + d.addCallback(self.value_stored, 4045) + d = self.a.storeValue(sha.new('4044').digest(), str(4044*2)) + d.addCallback(self.value_stored, 4044) + d = self.b.storeValue(sha.new('4045').digest(), str(4045*2)) + d.addCallback(self.value_stored, 4045) + + def check_values(self, result, values): + self.checked -= 1 + self.failUnless(len(result) == len(values)) + for v in result: + self.failUnless(v in values) + if self.checked == 0: + self.lastDefer.callback(1) + + def get_values(self): + self.checked = 4 + d = self.a.getValue(sha.new('4044').digest()) + d.addCallback(self.check_values, [str(4044*2)]) + d = self.b.getValue(sha.new('4044').digest()) + d.addCallback(self.check_values, [str(4044*2)]) + d = self.a.getValue(sha.new('4045').digest()) + d.addCallback(self.check_values, [str(4045*2), str(4045*3)]) + d = self.b.getValue(sha.new('4045').digest()) + d.addCallback(self.check_values, [str(4045*2), str(4045*3)]) + + def test_store(self): + from twisted.internet.base import DelayedCall + DelayedCall.debug = True + self.lastDefer = defer.Deferred() + d = self.a.join() + d.addCallback(self.node_join) + d.addCallback(self.store_values) + return self.lastDefer + + def tearDown(self): + self.a.leave() + try: + os.unlink(self.a.khashmir.store.db) + except: + pass + self.b.leave() + try: + os.unlink(self.b.khashmir.store.db) + except: + pass + +class TestMultiDHT(unittest.TestCase): + """More complicated 20-node tests for the DHT.""" + + timeout = 60 + num = 20 + DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160, + 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4, + 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000, + 'MAX_FAILURES': 3, + 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600, + 'KEY_EXPIRE': 3600, 'SPEW': False, } + + def setUp(self): + self.l = [] + self.startport = 4081 + for i in range(self.num): + self.l.append(DHT()) + self.l[i].config = self.DHT_DEFAULTS.copy() + self.l[i].config['PORT'] = self.startport + i + self.l[i].bootstrap = ["127.0.0.1:" + str(self.startport)] + self.l[i].cache_dir = '/tmp' + self.l[0].bootstrap_node = True + + def node_join(self, result, next_node): + d = self.l[next_node].join() + if next_node + 1 < len(self.l): + d.addCallback(self.node_join, next_node + 1) + else: + d.addCallback(self.lastDefer.callback) + + def test_join(self): + self.timeout = 2 + self.lastDefer = defer.Deferred() + d = self.l[0].join() + d.addCallback(self.node_join, 1) + return self.lastDefer + + def store_values(self, result, i = 0, j = 0): + if j > i: + j -= i+1 + i += 1 + if i == len(self.l): + self.get_values() + else: + d = self.l[j].storeValue(sha.new(str(self.startport+i)).digest(), str((self.startport+i)*(j+1))) + d.addCallback(self.store_values, i, j+1) + + def get_values(self, result = None, check = None, i = 0, j = 0): + if result is not None: + self.failUnless(len(result) == len(check)) + for v in result: + self.failUnless(v in check) + if j >= len(self.l): + j -= len(self.l) + i += 1 + if i == len(self.l): + self.lastDefer.callback(1) + else: + d = self.l[i].getValue(sha.new(str(self.startport+j)).digest()) + check = [] + for k in range(self.startport+j, (self.startport+j)*(j+1)+1, self.startport+j): + check.append(str(k)) + d.addCallback(self.get_values, check, i, j + random.randrange(1, min(len(self.l), 10))) + + def store_join(self, result, next_node): + d = self.l[next_node].join() + if next_node + 1 < len(self.l): + d.addCallback(self.store_join, next_node + 1) + else: + d.addCallback(self.store_values) + + def test_store(self): + from twisted.internet.base import DelayedCall + DelayedCall.debug = True + self.lastDefer = defer.Deferred() + d = self.l[0].join() + d.addCallback(self.store_join, 1) + return self.lastDefer + + def tearDown(self): + for i in self.l: + try: + i.leave() + os.unlink(i.khashmir.store.db) + except: + pass diff --git a/apt_p2p_Khashmir/__init__.py b/apt_p2p_Khashmir/__init__.py new file mode 100644 index 0000000..594e80a --- /dev/null +++ b/apt_p2p_Khashmir/__init__.py @@ -0,0 +1,22 @@ + +"""The apt-p2p implementation of the Khashmir DHT. + +These modules implement a modified Khashmir, which is a kademlia-like +Distributed Hash Table available at:: + + http://khashmir.sourceforge.net/ + +The protocol for the implementation's communication is described here:: + + http://www.camrdale.org/apt-p2p/protocol.html + +To run the DHT you probably want to do something like:: + + from apt_p2p_Khashmir import DHT + myDHT = DHT.DHT() + myDHT.loadConfig(config, section) + myDHT.join() + +at which point you should be up and running and connected to others in the DHT. + +""" diff --git a/apt_p2p_Khashmir/actions.py b/apt_p2p_Khashmir/actions.py new file mode 100644 index 0000000..1179713 --- /dev/null +++ b/apt_p2p_Khashmir/actions.py @@ -0,0 +1,347 @@ +## Copyright 2002-2004 Andrew Loewenstern, All Rights Reserved +# see LICENSE.txt for license information + +"""Details of how to perform actions on remote peers.""" + +from twisted.internet import reactor +from twisted.python import log + +from khash import intify +from util import uncompact + +class ActionBase: + """Base class for some long running asynchronous proccesses like finding nodes or values. + + @type caller: L{khashmir.Khashmir} + @ivar caller: the DHT instance that is performing the action + @type target: C{string} + @ivar target: the target of the action, usually a DHT key + @type config: C{dictionary} + @ivar config: the configuration variables for the DHT + @type action: C{string} + @ivar action: the name of the action to call on remote nodes + @type num: C{long} + @ivar num: the target key in integer form + @type queried: C{dictionary} + @ivar queried: the nodes that have been queried for this action, + keys are node IDs, values are the node itself + @type answered: C{dictionary} + @ivar answered: the nodes that have answered the queries + @type found: C{dictionary} + @ivar found: nodes that have been found so far by the action + @type sorted_nodes: C{list} of L{node.Node} + @ivar sorted_nodes: a sorted list of nodes by there proximity to the key + @type results: C{dictionary} + @ivar results: keys are the results found so far by the action + @type desired_results: C{int} + @ivar desired_results: the minimum number of results that are needed + before the action should stop + @type callback: C{method} + @ivar callback: the method to call with the results + @type outstanding: C{int} + @ivar outstanding: the number of requests currently outstanding + @type outstanding_results: C{int} + @ivar outstanding_results: the number of results that are expected from + the requests that are currently outstanding + @type finished: C{boolean} + @ivar finished: whether the action is done + @type sort: C{method} + @ivar sort: used to sort nodes by their proximity to the target + """ + + def __init__(self, caller, target, callback, config, action, num_results = None): + """Initialize the action. + + @type caller: L{khashmir.Khashmir} + @param caller: the DHT instance that is performing the action + @type target: C{string} + @param target: the target of the action, usually a DHT key + @type callback: C{method} + @param callback: the method to call with the results + @type config: C{dictionary} + @param config: the configuration variables for the DHT + @type action: C{string} + @param action: the name of the action to call on remote nodes + @type num_results: C{int} + @param num_results: the minimum number of results that are needed before + the action should stop (optional, defaults to getting all the results) + + """ + + self.caller = caller + self.target = target + self.config = config + self.action = action + self.num = intify(target) + self.queried = {} + self.answered = {} + self.found = {} + self.sorted_nodes = [] + self.results = {} + self.desired_results = num_results + self.callback = callback + self.outstanding = 0 + self.outstanding_results = 0 + self.finished = False + + def sort(a, b, num=self.num): + """Sort nodes relative to the ID we are looking for.""" + x, y = num ^ a.num, num ^ b.num + if x > y: + return 1 + elif x < y: + return -1 + return 0 + self.sort = sort + + #{ Main operation + def goWithNodes(self, nodes): + """Start the action's process with a list of nodes to contact.""" + for node in nodes: + if node.id == self.caller.node.id: + continue + else: + self.found[node.id] = node + self.sortNodes() + self.schedule() + + def schedule(self): + """Schedule requests to be sent to remote nodes.""" + # Check if we are already done + if self.desired_results and ((len(self.results) >= abs(self.desired_results)) or + (self.desired_results < 0 and + len(self.answered) >= self.config['STORE_REDUNDANCY'])): + self.finished = True + result = self.generateResult() + reactor.callLater(0, self.callback, *result) + + if self.finished or (self.desired_results and + len(self.results) + self.outstanding_results >= abs(self.desired_results)): + return + + # Loop for each node that should be processed + for node in self.getNodesToProcess(): + # Don't send requests twice or to ourself + if node.id not in self.queried and node.id != self.caller.node.id: + self.queried[node.id] = 1 + + # Get the action to call on the node + try: + f = getattr(node, self.action) + except AttributeError: + log.msg("%s doesn't have a %s method!" % (node, self.action)) + else: + # Get the arguments to the action's method + try: + args, expected_results = self.generateArgs(node) + except ValueError: + pass + else: + # Call the action on the remote node + self.outstanding += 1 + self.outstanding_results += expected_results + df = f(self.caller.node.id, *args) + df.addCallbacks(self.gotResponse, self.actionFailed, + callbackArgs = (node, expected_results), + errbackArgs = (node, expected_results)) + + # We might have to stop for now + if (self.outstanding >= self.config['CONCURRENT_REQS'] or + (self.desired_results and + len(self.results) + self.outstanding_results >= abs(self.desired_results))): + break + + assert self.outstanding >= 0 + assert self.outstanding_results >= 0 + + # If no requests are outstanding, then we are done + if self.outstanding == 0: + self.finished = True + result = self.generateResult() + reactor.callLater(0, self.callback, *result) + + def gotResponse(self, dict, node, expected_results): + """Receive a response from a remote node.""" + self.caller.insertNode(node) + if self.finished or self.answered.has_key(node.id): + # a day late and a dollar short + return + self.outstanding -= 1 + self.outstanding_results -= expected_results + self.answered[node.id] = 1 + self.processResponse(dict['rsp']) + self.schedule() + + def actionFailed(self, err, node, expected_results): + """Receive an error from a remote node.""" + log.msg("action %s failed (%s) %s/%s" % (self.action, self.config['PORT'], node.host, node.port)) + log.err(err) + self.caller.table.nodeFailed(node) + self.outstanding -= 1 + self.outstanding_results -= expected_results + self.schedule() + + def handleGotNodes(self, nodes): + """Process any received node contact info in the response. + + Not called by default, but suitable for being called by + L{processResponse} in a recursive node search. + """ + for compact_node in nodes: + node_contact = uncompact(compact_node) + node = self.caller.Node(node_contact) + if not self.found.has_key(node.id): + self.found[node.id] = node + + def sortNodes(self): + """Sort the nodes, if necessary. + + Assumes nodes are never removed from the L{found} dictionary. + """ + if len(self.sorted_nodes) != len(self.found): + self.sorted_nodes = self.found.values() + self.sorted_nodes.sort(self.sort) + + #{ Subclass for specific actions + def getNodesToProcess(self): + """Generate a list of nodes to process next. + + This implementation is suitable for a recurring search over all nodes. + """ + self.sortNodes() + return self.sorted_nodes[:self.config['K']] + + def generateArgs(self, node): + """Generate the arguments to the node's action. + + These arguments will be appended to our node ID when calling the action. + Also return the number of results expected from this request. + + @raise ValueError: if the node should not be queried + """ + return (self.target, ), 0 + + def processResponse(self, dict): + """Process the response dictionary received from the remote node.""" + self.handleGotNodes(dict['nodes']) + + def generateResult(self, nodes): + """Create the final result to return to the L{callback} function.""" + return [] + + +class FindNode(ActionBase): + """Find the closest nodes to the key.""" + + def __init__(self, caller, target, callback, config, action="findNode"): + ActionBase.__init__(self, caller, target, callback, config, action) + + def processResponse(self, dict): + """Save the token received from each node.""" + if dict["id"] in self.found: + self.found[dict["id"]].updateToken(dict.get('token', '')) + self.handleGotNodes(dict['nodes']) + + def generateResult(self): + """Result is the K closest nodes to the target.""" + self.sortNodes() + return (self.sorted_nodes[:self.config['K']], ) + + +class FindValue(ActionBase): + """Find the closest nodes to the key and check for values.""" + + def __init__(self, caller, target, callback, config, action="findValue"): + ActionBase.__init__(self, caller, target, callback, config, action) + + def processResponse(self, dict): + """Save the number of values each node has.""" + if dict["id"] in self.found: + self.found[dict["id"]].updateNumValues(dict.get('num', 0)) + self.handleGotNodes(dict['nodes']) + + def generateResult(self): + """Result is the nodes that have values, sorted by proximity to the key.""" + self.sortNodes() + return ([node for node in self.sorted_nodes if node.num_values > 0], ) + + +class GetValue(ActionBase): + """Retrieve values from a list of nodes.""" + + def __init__(self, caller, target, local_results, num_results, callback, config, action="getValue"): + """Initialize the action with the locally available results. + + @type local_results: C{list} of C{string} + @param local_results: the values that were available in this node + """ + ActionBase.__init__(self, caller, target, callback, config, action, num_results) + if local_results: + for result in local_results: + self.results[result] = 1 + + def getNodesToProcess(self): + """Nodes are never added, always return the same sorted node list.""" + return self.sorted_nodes + + def generateArgs(self, node): + """Arguments include the number of values to request.""" + if node.num_values > 0: + # Request all desired results from each node, just to be sure. + num_values = abs(self.desired_results) - len(self.results) + assert num_values > 0 + if num_values > node.num_values: + num_values = 0 + return (self.target, num_values), node.num_values + else: + raise ValueError, "Don't try and get values from this node because it doesn't have any" + + def processResponse(self, dict): + """Save the returned values, calling the L{callback} each time there are new ones.""" + if dict.has_key('values'): + def x(y, z=self.results): + if not z.has_key(y): + z[y] = 1 + return y + else: + return None + z = len(dict['values']) + v = filter(None, map(x, dict['values'])) + if len(v): + reactor.callLater(0, self.callback, self.target, v) + + def generateResult(self): + """Results have all been returned, now send the empty list to end the action.""" + return (self.target, []) + + +class StoreValue(ActionBase): + """Store a value in a list of nodes.""" + + def __init__(self, caller, target, value, num_results, callback, config, action="storeValue"): + """Initialize the action with the value to store. + + @type value: C{string} + @param value: the value to store in the nodes + """ + ActionBase.__init__(self, caller, target, callback, config, action, num_results) + self.value = value + + def getNodesToProcess(self): + """Nodes are never added, always return the same sorted list.""" + return self.sorted_nodes + + def generateArgs(self, node): + """Args include the value to store and the node's token.""" + if node.token: + return (self.target, self.value, node.token), 1 + else: + raise ValueError, "Don't store at this node since we don't know it's token" + + def processResponse(self, dict): + """Save the response, though it should be nothin but the ID.""" + self.results[dict["id"]] = dict + + def generateResult(self): + """Return all the response IDs received.""" + return (self.target, self.value, self.results.values()) diff --git a/apt_p2p_Khashmir/bencode.py b/apt_p2p_Khashmir/bencode.py new file mode 100644 index 0000000..06a64e7 --- /dev/null +++ b/apt_p2p_Khashmir/bencode.py @@ -0,0 +1,480 @@ + +"""Functions for bencoding and bdecoding data. + +@type decode_func: C{dictionary} of C{function} +@var decode_func: a dictionary of function calls to be made, based on data, + the keys are the first character of the data and the value is the + function to use to decode that data +@type bencached_marker: C{list} +@var bencached_marker: mutable type to ensure class origination +@type encode_func: C{dictionary} of C{function} +@var encode_func: a dictionary of function calls to be made, based on data, + the keys are the type of the data and the value is the + function to use to encode that data +@type BencachedType: C{type} +@var BencachedType: the L{Bencached} type +""" + +from types import IntType, LongType, StringType, ListType, TupleType, DictType, BooleanType +try: + from types import UnicodeType +except ImportError: + UnicodeType = None +from datetime import datetime +import time + +from twisted.python import log +from twisted.trial import unittest + +class BencodeError(ValueError): + pass + +def decode_int(x, f): + """Bdecode an integer. + + @type x: C{string} + @param x: the data to decode + @type f: C{int} + @param f: the offset in the data to start at + @rtype: C{int}, C{int} + @return: the bdecoded integer, and the offset to read next + @raise BencodeError: if the data is improperly encoded + + """ + + f += 1 + newf = x.index('e', f) + try: + n = int(x[f:newf]) + except: + n = long(x[f:newf]) + if x[f] == '-': + if x[f + 1] == '0': + raise BencodeError, "integer has a leading zero after a negative sign" + elif x[f] == '0' and newf != f+1: + raise BencodeError, "integer has a leading zero" + return (n, newf+1) + +def decode_string(x, f): + """Bdecode a string. + + @type x: C{string} + @param x: the data to decode + @type f: C{int} + @param f: the offset in the data to start at + @rtype: C{string}, C{int} + @return: the bdecoded string, and the offset to read next + @raise BencodeError: if the data is improperly encoded + + """ + + colon = x.index(':', f) + try: + n = int(x[f:colon]) + except (OverflowError, ValueError): + n = long(x[f:colon]) + if x[f] == '0' and colon != f+1: + raise BencodeError, "string length has a leading zero" + colon += 1 + return (x[colon:colon+n], colon+n) + +def decode_unicode(x, f): + """Bdecode a unicode string. + + @type x: C{string} + @param x: the data to decode + @type f: C{int} + @param f: the offset in the data to start at + @rtype: C{int}, C{int} + @return: the bdecoded unicode string, and the offset to read next + + """ + + s, f = decode_string(x, f+1) + return (s.decode('UTF-8'),f) + +def decode_datetime(x, f): + """Bdecode a datetime value. + + @type x: C{string} + @param x: the data to decode + @type f: C{int} + @param f: the offset in the data to start at + @rtype: C{datetime.datetime}, C{int} + @return: the bdecoded integer, and the offset to read next + @raise BencodeError: if the data is improperly encoded + + """ + + f += 1 + newf = x.index('e', f) + try: + date = datetime(*(time.strptime(x[f:newf], '%Y-%m-%dT%H:%M:%S')[0:6])) + except: + raise BencodeError, "datetime value could not be decoded: %s" % x[f:newf] + return (date, newf+1) + +def decode_list(x, f): + """Bdecode a list. + + @type x: C{string} + @param x: the data to decode + @type f: C{int} + @param f: the offset in the data to start at + @rtype: C{list}, C{int} + @return: the bdecoded list, and the offset to read next + + """ + + r, f = [], f+1 + while x[f] != 'e': + v, f = decode_func[x[f]](x, f) + r.append(v) + return (r, f + 1) + +def decode_dict(x, f): + """Bdecode a dictionary. + + @type x: C{string} + @param x: the data to decode + @type f: C{int} + @param f: the offset in the data to start at + @rtype: C{dictionary}, C{int} + @return: the bdecoded dictionary, and the offset to read next + @raise BencodeError: if the data is improperly encoded + + """ + + r, f = {}, f+1 + lastkey = None + while x[f] != 'e': + k, f = decode_string(x, f) + if lastkey >= k: + raise BencodeError, "dictionary keys must be in sorted order" + lastkey = k + r[k], f = decode_func[x[f]](x, f) + return (r, f + 1) + +decode_func = {} +decode_func['l'] = decode_list +decode_func['d'] = decode_dict +decode_func['i'] = decode_int +decode_func['0'] = decode_string +decode_func['1'] = decode_string +decode_func['2'] = decode_string +decode_func['3'] = decode_string +decode_func['4'] = decode_string +decode_func['5'] = decode_string +decode_func['6'] = decode_string +decode_func['7'] = decode_string +decode_func['8'] = decode_string +decode_func['9'] = decode_string +decode_func['u'] = decode_unicode +decode_func['t'] = decode_datetime + +def bdecode(x, sloppy = False): + """Bdecode a string of data. + + @type x: C{string} + @param x: the data to decode + @type sloppy: C{boolean} + @param sloppy: whether to allow errors in the decoding + @rtype: unknown + @return: the bdecoded data + @raise BencodeError: if the data is improperly encoded + + """ + + try: + r, l = decode_func[x[0]](x, 0) +# except (IndexError, KeyError): + except (IndexError, KeyError, ValueError): + raise BencodeError, "bad bencoded data" + if not sloppy and l != len(x): + raise BencodeError, "bad bencoded data, all could not be decoded" + return r + +bencached_marker = [] + +class Bencached(object): + """Dummy data structure for storing bencoded data in memory. + + @type marker: C{list} + @ivar marker: mutable type to make sure the data was encoded by this class + @type bencoded: C{string} + @ivar bencoded: the bencoded data stored in a string + + """ + + def __init__(self, s): + """ + + @type s: C{string} + @param s: the new bencoded data to store + + """ + + self.marker = bencached_marker + self.bencoded = s + +BencachedType = type(Bencached('')) # insufficient, but good as a filter + +def encode_bencached(x,r): + """Bencode L{Bencached} data. + + @type x: L{Bencached} + @param x: the data to encode + @type r: C{list} + @param r: the currently bencoded data, to which the bencoding of x + will be appended + + """ + + assert x.marker == bencached_marker + r.append(x.bencoded) + +def encode_int(x,r): + """Bencode an integer. + + @type x: C{int} + @param x: the data to encode + @type r: C{list} + @param r: the currently bencoded data, to which the bencoding of x + will be appended + + """ + + r.extend(('i',str(x),'e')) + +def encode_bool(x,r): + """Bencode a boolean. + + @type x: C{boolean} + @param x: the data to encode + @type r: C{list} + @param r: the currently bencoded data, to which the bencoding of x + will be appended + + """ + + encode_int(int(x),r) + +def encode_string(x,r): + """Bencode a string. + + @type x: C{string} + @param x: the data to encode + @type r: C{list} + @param r: the currently bencoded data, to which the bencoding of x + will be appended + + """ + + r.extend((str(len(x)),':',x)) + +def encode_unicode(x,r): + """Bencode a unicode string. + + @type x: C{unicode} + @param x: the data to encode + @type r: C{list} + @param r: the currently bencoded data, to which the bencoding of x + will be appended + + """ + + #r.append('u') + encode_string(x.encode('UTF-8'),r) + +def encode_datetime(x,r): + """Bencode a datetime value in UTC. + + If the datetime object has time zone info, it is converted to UTC time. + Otherwise it is assumed that the time is already in UTC time. + Microseconds are removed. + + @type x: C{datetime.datetime} + @param x: the data to encode + @type r: C{list} + @param r: the currently bencoded data, to which the bencoding of x + will be appended + + """ + + date = x.replace(microsecond = 0) + offset = date.utcoffset() + if offset is not None: + utcdate = date.replace(tzinfo = None) + offset + else: + utcdate = date + r.extend(('t',utcdate.isoformat(),'e')) + +def encode_list(x,r): + """Bencode a list. + + @type x: C{list} + @param x: the data to encode + @type r: C{list} + @param r: the currently bencoded data, to which the bencoding of x + will be appended + + """ + + r.append('l') + for e in x: + encode_func[type(e)](e, r) + r.append('e') + +def encode_dict(x,r): + """Bencode a dictionary. + + @type x: C{dictionary} + @param x: the data to encode + @type r: C{list} + @param r: the currently bencoded data, to which the bencoding of x + will be appended + + """ + + r.append('d') + ilist = x.items() + ilist.sort() + for k,v in ilist: + r.extend((str(len(k)),':',k)) + encode_func[type(v)](v, r) + r.append('e') + +encode_func = {} +encode_func[BencachedType] = encode_bencached +encode_func[IntType] = encode_int +encode_func[LongType] = encode_int +encode_func[StringType] = encode_string +encode_func[ListType] = encode_list +encode_func[TupleType] = encode_list +encode_func[DictType] = encode_dict +encode_func[BooleanType] = encode_bool +encode_func[datetime] = encode_datetime +if UnicodeType: + encode_func[UnicodeType] = encode_unicode + +def bencode(x): + """Bencode some data. + + @type x: unknown + @param x: the data to encode + @rtype: string + @return: the bencoded data + @raise BencodeError: if the data contains a type that cannot be encoded + + """ + r = [] + try: + encode_func[type(x)](x, r) + except: + raise BencodeError, "failed to bencode the data" + return ''.join(r) + +class TestBencode(unittest.TestCase): + """Test the bencoding and bdecoding of data.""" + + timeout = 2 + + def test_bdecode_string(self): + self.failUnlessRaises(BencodeError, bdecode, '0:0:') + self.failUnlessRaises(BencodeError, bdecode, '') + self.failUnlessRaises(BencodeError, bdecode, '35208734823ljdahflajhdf') + self.failUnlessRaises(BencodeError, bdecode, '2:abfdjslhfld') + self.failUnlessEqual(bdecode('0:'), '') + self.failUnlessEqual(bdecode('3:abc'), 'abc') + self.failUnlessEqual(bdecode('10:1234567890'), '1234567890') + self.failUnlessRaises(BencodeError, bdecode, '02:xy') + self.failUnlessRaises(BencodeError, bdecode, '9999:x') + + def test_bdecode_int(self): + self.failUnlessRaises(BencodeError, bdecode, 'ie') + self.failUnlessRaises(BencodeError, bdecode, 'i341foo382e') + self.failUnlessEqual(bdecode('i4e'), 4L) + self.failUnlessEqual(bdecode('i0e'), 0L) + self.failUnlessEqual(bdecode('i123456789e'), 123456789L) + self.failUnlessEqual(bdecode('i-10e'), -10L) + self.failUnlessRaises(BencodeError, bdecode, 'i-0e') + self.failUnlessRaises(BencodeError, bdecode, 'i123') + self.failUnlessRaises(BencodeError, bdecode, 'i6easd') + self.failUnlessRaises(BencodeError, bdecode, 'i03e') + + def test_bdecode_list(self): + self.failUnlessRaises(BencodeError, bdecode, 'l') + self.failUnlessEqual(bdecode('le'), []) + self.failUnlessRaises(BencodeError, bdecode, 'leanfdldjfh') + self.failUnlessEqual(bdecode('l0:0:0:e'), ['', '', '']) + self.failUnlessRaises(BencodeError, bdecode, 'relwjhrlewjh') + self.failUnlessEqual(bdecode('li1ei2ei3ee'), [1, 2, 3]) + self.failUnlessEqual(bdecode('l3:asd2:xye'), ['asd', 'xy']) + self.failUnlessEqual(bdecode('ll5:Alice3:Bobeli2ei3eee'), [['Alice', 'Bob'], [2, 3]]) + self.failUnlessRaises(BencodeError, bdecode, 'l01:ae') + self.failUnlessRaises(BencodeError, bdecode, 'l0:') + + def test_bdecode_dict(self): + self.failUnlessRaises(BencodeError, bdecode, 'd') + self.failUnlessRaises(BencodeError, bdecode, 'defoobar') + self.failUnlessEqual(bdecode('de'), {}) + self.failUnlessEqual(bdecode('d3:agei25e4:eyes4:bluee'), {'age': 25, 'eyes': 'blue'}) + self.failUnlessEqual(bdecode('d8:spam.mp3d6:author5:Alice6:lengthi100000eee'), + {'spam.mp3': {'author': 'Alice', 'length': 100000}}) + self.failUnlessRaises(BencodeError, bdecode, 'd3:fooe') + self.failUnlessRaises(BencodeError, bdecode, 'di1e0:e') + self.failUnlessRaises(BencodeError, bdecode, 'd1:b0:1:a0:e') + self.failUnlessRaises(BencodeError, bdecode, 'd1:a0:1:a0:e') + self.failUnlessRaises(BencodeError, bdecode, 'd0:0:') + self.failUnlessRaises(BencodeError, bdecode, 'd0:') + + def test_bdecode_unicode(self): + self.failUnlessRaises(BencodeError, bdecode, 'u0:0:') + self.failUnlessRaises(BencodeError, bdecode, 'u') + self.failUnlessRaises(BencodeError, bdecode, 'u35208734823ljdahflajhdf') + self.failUnlessRaises(BencodeError, bdecode, 'u2:abfdjslhfld') + self.failUnlessEqual(bdecode('u0:'), '') + self.failUnlessEqual(bdecode('u3:abc'), 'abc') + self.failUnlessEqual(bdecode('u10:1234567890'), '1234567890') + self.failUnlessRaises(BencodeError, bdecode, 'u02:xy') + self.failUnlessRaises(BencodeError, bdecode, 'u9999:x') + + def test_bencode_int(self): + self.failUnlessEqual(bencode(4), 'i4e') + self.failUnlessEqual(bencode(0), 'i0e') + self.failUnlessEqual(bencode(-10), 'i-10e') + self.failUnlessEqual(bencode(12345678901234567890L), 'i12345678901234567890e') + + def test_bencode_string(self): + self.failUnlessEqual(bencode(''), '0:') + self.failUnlessEqual(bencode('abc'), '3:abc') + self.failUnlessEqual(bencode('1234567890'), '10:1234567890') + + def test_bencode_list(self): + self.failUnlessEqual(bencode([]), 'le') + self.failUnlessEqual(bencode([1, 2, 3]), 'li1ei2ei3ee') + self.failUnlessEqual(bencode([['Alice', 'Bob'], [2, 3]]), 'll5:Alice3:Bobeli2ei3eee') + + def test_bencode_dict(self): + self.failUnlessEqual(bencode({}), 'de') + self.failUnlessEqual(bencode({'age': 25, 'eyes': 'blue'}), 'd3:agei25e4:eyes4:bluee') + self.failUnlessEqual(bencode({'spam.mp3': {'author': 'Alice', 'length': 100000}}), + 'd8:spam.mp3d6:author5:Alice6:lengthi100000eee') + self.failUnlessRaises(BencodeError, bencode, {1: 'foo'}) + + def test_bencode_unicode(self): + self.failUnlessEqual(bencode(u''), '0:') + self.failUnlessEqual(bencode(u'abc'), '3:abc') + self.failUnlessEqual(bencode(u'1234567890'), '10:1234567890') + + def test_bool(self): + self.failUnless(bdecode(bencode(True))) + self.failIf(bdecode(bencode(False))) + + def test_datetime(self): + date = datetime.utcnow() + self.failUnlessEqual(bdecode(bencode(date)), date.replace(microsecond = 0)) + + if UnicodeType == None: + test_bencode_unicode.skip = "Python was not compiled with unicode support" + test_bdecode_unicode.skip = "Python was not compiled with unicode support" diff --git a/apt_p2p_Khashmir/db.py b/apt_p2p_Khashmir/db.py new file mode 100644 index 0000000..47e974c --- /dev/null +++ b/apt_p2p_Khashmir/db.py @@ -0,0 +1,206 @@ + +"""An sqlite database for storing nodes and key/value pairs.""" + +from datetime import datetime, timedelta +from pysqlite2 import dbapi2 as sqlite +from binascii import a2b_base64, b2a_base64 +from time import sleep +import os + +from twisted.trial import unittest + +class DBExcept(Exception): + pass + +class khash(str): + """Dummy class to convert all hashes to base64 for storing in the DB.""" + +class dht_value(str): + """Dummy class to convert all DHT values to base64 for storing in the DB.""" + +# Initialize the database to work with 'khash' objects (binary strings) +sqlite.register_adapter(khash, b2a_base64) +sqlite.register_converter("KHASH", a2b_base64) +sqlite.register_converter("khash", a2b_base64) + +# Initialize the database to work with DHT values (binary strings) +sqlite.register_adapter(dht_value, b2a_base64) +sqlite.register_converter("DHT_VALUE", a2b_base64) +sqlite.register_converter("dht_value", a2b_base64) + +class DB: + """An sqlite database for storing persistent node info and key/value pairs. + + @type db: C{string} + @ivar db: the database file to use + @type conn: L{pysqlite2.dbapi2.Connection} + @ivar conn: an open connection to the sqlite database + """ + + def __init__(self, db): + """Load or create the database file. + + @type db: C{string} + @param db: the database file to use + """ + self.db = db + try: + os.stat(db) + except OSError: + self._createNewDB(db) + else: + self._loadDB(db) + if sqlite.version_info < (2, 1): + sqlite.register_converter("TEXT", str) + sqlite.register_converter("text", str) + else: + self.conn.text_factory = str + + #{ Loading the DB + def _loadDB(self, db): + """Open a new connection to the existing database file""" + try: + self.conn = sqlite.connect(database=db, detect_types=sqlite.PARSE_DECLTYPES) + except: + import traceback + raise DBExcept, "Couldn't open DB", traceback.format_exc() + + def _createNewDB(self, db): + """Open a connection to a new database and create the necessary tables.""" + self.conn = sqlite.connect(database=db, detect_types=sqlite.PARSE_DECLTYPES) + c = self.conn.cursor() + c.execute("CREATE TABLE kv (key KHASH, value DHT_VALUE, last_refresh TIMESTAMP, "+ + "PRIMARY KEY (key, value))") + c.execute("CREATE INDEX kv_key ON kv(key)") + c.execute("CREATE INDEX kv_last_refresh ON kv(last_refresh)") + c.execute("CREATE TABLE nodes (id KHASH PRIMARY KEY, host TEXT, port NUMBER)") + c.execute("CREATE TABLE self (num NUMBER PRIMARY KEY, id KHASH)") + self.conn.commit() + + def close(self): + self.conn.close() + + #{ This node's ID + def getSelfNode(self): + """Retrieve this node's ID from a previous run of the program.""" + c = self.conn.cursor() + c.execute('SELECT id FROM self WHERE num = 0') + id = c.fetchone() + if id: + return id[0] + else: + return None + + def saveSelfNode(self, id): + """Store this node's ID for a subsequent run of the program.""" + c = self.conn.cursor() + c.execute("INSERT OR REPLACE INTO self VALUES (0, ?)", (khash(id),)) + self.conn.commit() + + #{ Routing table + def dumpRoutingTable(self, buckets): + """Save routing table nodes to the database.""" + c = self.conn.cursor() + c.execute("DELETE FROM nodes WHERE id NOT NULL") + for bucket in buckets: + for node in bucket.l: + c.execute("INSERT INTO nodes VALUES (?, ?, ?)", (khash(node.id), node.host, node.port)) + self.conn.commit() + + def getRoutingTable(self): + """Load routing table nodes from database.""" + c = self.conn.cursor() + c.execute("SELECT * FROM nodes") + return c.fetchall() + + #{ Key/value pairs + def retrieveValues(self, key): + """Retrieve values from the database.""" + c = self.conn.cursor() + c.execute("SELECT value FROM kv WHERE key = ?", (khash(key),)) + l = [] + rows = c.fetchall() + for row in rows: + l.append(row[0]) + return l + + def countValues(self, key): + """Count the number of values in the database.""" + c = self.conn.cursor() + c.execute("SELECT COUNT(value) as num_values FROM kv WHERE key = ?", (khash(key),)) + res = 0 + row = c.fetchone() + if row: + res = row[0] + return res + + def storeValue(self, key, value): + """Store or update a key and value.""" + c = self.conn.cursor() + c.execute("INSERT OR REPLACE INTO kv VALUES (?, ?, ?)", + (khash(key), dht_value(value), datetime.now())) + self.conn.commit() + + def expireValues(self, expireAfter): + """Expire older values after expireAfter seconds.""" + t = datetime.now() - timedelta(seconds=expireAfter) + c = self.conn.cursor() + c.execute("DELETE FROM kv WHERE last_refresh < ?", (t, )) + self.conn.commit() + +class TestDB(unittest.TestCase): + """Tests for the khashmir database.""" + + timeout = 5 + db = '/tmp/khashmir.db' + key = '\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!' + + def setUp(self): + self.store = DB(self.db) + + def test_selfNode(self): + self.store.saveSelfNode(self.key) + self.failUnlessEqual(self.store.getSelfNode(), self.key) + + def test_Value(self): + self.store.storeValue(self.key, self.key) + val = self.store.retrieveValues(self.key) + self.failUnlessEqual(len(val), 1) + self.failUnlessEqual(val[0], self.key) + + def test_expireValues(self): + self.store.storeValue(self.key, self.key) + sleep(2) + self.store.storeValue(self.key, self.key+self.key) + self.store.expireValues(1) + val = self.store.retrieveValues(self.key) + self.failUnlessEqual(len(val), 1) + self.failUnlessEqual(val[0], self.key+self.key) + + def test_RoutingTable(self): + class dummy: + id = self.key + host = "127.0.0.1" + port = 9977 + def contents(self): + return (self.id, self.host, self.port) + dummy2 = dummy() + dummy2.id = '\xaa\xbb\xcc\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!' + dummy2.host = '205.23.67.124' + dummy2.port = 12345 + class bl: + def __init__(self): + self.l = [] + bl1 = bl() + bl1.l.append(dummy()) + bl2 = bl() + bl2.l.append(dummy2) + buckets = [bl1, bl2] + self.store.dumpRoutingTable(buckets) + rt = self.store.getRoutingTable() + self.failUnlessIn(dummy().contents(), rt) + self.failUnlessIn(dummy2.contents(), rt) + + def tearDown(self): + self.store.close() + os.unlink(self.db) diff --git a/apt_p2p_Khashmir/khash.py b/apt_p2p_Khashmir/khash.py new file mode 100644 index 0000000..91db232 --- /dev/null +++ b/apt_p2p_Khashmir/khash.py @@ -0,0 +1,103 @@ +## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved +# see LICENSE.txt for license information + +"""Functions to deal with hashes (node IDs and keys).""" + +from sha import sha +from os import urandom + +from twisted.trial import unittest + +def intify(hstr): + """Convert a hash (big-endian) to a long python integer.""" + assert len(hstr) == 20 + return long(hstr.encode('hex'), 16) + +def stringify(num): + """Convert a long python integer to a hash.""" + str = hex(num)[2:] + if str[-1] == 'L': + str = str[:-1] + if len(str) % 2 != 0: + str = '0' + str + str = str.decode('hex') + return (20 - len(str)) *'\x00' + str + +def distance(a, b): + """Calculate the distance between two hashes expressed as strings.""" + return intify(a) ^ intify(b) + +def newID(): + """Get a new pseudorandom globally unique hash string.""" + h = sha() + h.update(urandom(20)) + return h.digest() + +def newIDInRange(min, max): + """Get a new pseudorandom globally unique hash string in the range.""" + return stringify(randRange(min,max)) + +def randRange(min, max): + """Get a new pseudorandom globally unique hash number in the range.""" + return min + intify(newID()) % (max - min) + +def newTID(): + """Get a new pseudorandom transaction ID number.""" + return randRange(-2**30, 2**30) + +class TestNewID(unittest.TestCase): + """Test the newID function.""" + def testLength(self): + self.failUnlessEqual(len(newID()), 20) + def testHundreds(self): + for x in xrange(100): + self.testLength + +class TestIntify(unittest.TestCase): + """Test the intify function.""" + known = [('\0' * 20, 0), + ('\xff' * 20, 2L**160 - 1), + ] + def testKnown(self): + for str, value in self.known: + self.failUnlessEqual(intify(str), value) + def testEndianessOnce(self): + h = newID() + while h[-1] == '\xff': + h = newID() + k = h[:-1] + chr(ord(h[-1]) + 1) + self.failUnlessEqual(intify(k) - intify(h), 1) + def testEndianessLots(self): + for x in xrange(100): + self.testEndianessOnce() + +class TestDisantance(unittest.TestCase): + """Test the distance function.""" + known = [ + (("\0" * 20, "\xff" * 20), 2**160L -1), + ((sha("foo").digest(), sha("foo").digest()), 0), + ((sha("bar").digest(), sha("bar").digest()), 0) + ] + def testKnown(self): + for pair, dist in self.known: + self.failUnlessEqual(distance(pair[0], pair[1]), dist) + def testCommutitive(self): + for i in xrange(100): + x, y, z = newID(), newID(), newID() + self.failUnlessEqual(distance(x,y) ^ distance(y, z), distance(x, z)) + +class TestRandRange(unittest.TestCase): + """Test the randRange function.""" + def testOnce(self): + a = intify(newID()) + b = intify(newID()) + if a < b: + c = randRange(a, b) + self.failUnlessEqual(a <= c < b, True, "output out of range %d %d %d" % (b, c, a)) + else: + c = randRange(b, a) + self.failUnlessEqual(b <= c < a, True, "output out of range %d %d %d" % (b, c, a)) + + def testOneHundredTimes(self): + for i in xrange(100): + self.testOnce() diff --git a/apt_p2p_Khashmir/khashmir.py b/apt_p2p_Khashmir/khashmir.py new file mode 100644 index 0000000..126a30e --- /dev/null +++ b/apt_p2p_Khashmir/khashmir.py @@ -0,0 +1,666 @@ +## Copyright 2002-2004 Andrew Loewenstern, All Rights Reserved +# see LICENSE.txt for license information + +"""The main Khashmir program.""" + +import warnings +warnings.simplefilter("ignore", DeprecationWarning) + +from datetime import datetime, timedelta +from random import randrange, shuffle +from sha import sha +import os + +from twisted.internet.defer import Deferred +from twisted.internet import protocol, reactor +from twisted.trial import unittest + +from db import DB +from ktable import KTable +from knode import KNodeBase, KNodeRead, KNodeWrite, NULL_ID +from khash import newID, newIDInRange +from actions import FindNode, FindValue, GetValue, StoreValue +import krpc + +class KhashmirBase(protocol.Factory): + """The base Khashmir class, with base functionality and find node, no key-value mappings. + + @type _Node: L{node.Node} + @ivar _Node: the knode implementation to use for this class of DHT + @type config: C{dictionary} + @ivar config: the configuration parameters for the DHT + @type port: C{int} + @ivar port: the port to listen on + @type store: L{db.DB} + @ivar store: the database to store nodes and key/value pairs in + @type node: L{node.Node} + @ivar node: this node + @type table: L{ktable.KTable} + @ivar table: the routing table + @type token_secrets: C{list} of C{string} + @ivar token_secrets: the current secrets to use to create tokens + @type udp: L{krpc.hostbroker} + @ivar udp: the factory for the KRPC protocol + @type listenport: L{twisted.internet.interfaces.IListeningPort} + @ivar listenport: the UDP listening port + @type next_checkpoint: L{twisted.internet.interfaces.IDelayedCall} + @ivar next_checkpoint: the delayed call for the next checkpoint + """ + + _Node = KNodeBase + + def __init__(self, config, cache_dir='/tmp'): + """Initialize the Khashmir class and call the L{setup} method. + + @type config: C{dictionary} + @param config: the configuration parameters for the DHT + @type cache_dir: C{string} + @param cache_dir: the directory to store all files in + (optional, defaults to the /tmp directory) + """ + self.config = None + self.setup(config, cache_dir) + + def setup(self, config, cache_dir): + """Setup all the Khashmir sub-modules. + + @type config: C{dictionary} + @param config: the configuration parameters for the DHT + @type cache_dir: C{string} + @param cache_dir: the directory to store all files in + """ + self.config = config + self.port = config['PORT'] + self.store = DB(os.path.join(cache_dir, 'khashmir.' + str(self.port) + '.db')) + self.node = self._loadSelfNode('', self.port) + self.table = KTable(self.node, config) + self.token_secrets = [newID()] + + # Start listening + self.udp = krpc.hostbroker(self, config) + self.udp.protocol = krpc.KRPC + self.listenport = reactor.listenUDP(self.port, self.udp) + + # Load the routing table and begin checkpointing + self._loadRoutingTable() + self.refreshTable(force = True) + self.next_checkpoint = reactor.callLater(60, self.checkpoint) + + def Node(self, id, host = None, port = None): + """Create a new node. + + @see: L{node.Node.__init__} + """ + n = self._Node(id, host, port) + n.table = self.table + n.conn = self.udp.connectionForAddr((n.host, n.port)) + return n + + def __del__(self): + """Stop listening for packets.""" + self.listenport.stopListening() + + def _loadSelfNode(self, host, port): + """Create this node, loading any previously saved one.""" + id = self.store.getSelfNode() + if not id: + id = newID() + return self._Node(id, host, port) + + def checkpoint(self): + """Perform some periodic maintenance operations.""" + # Create a new token secret + self.token_secrets.insert(0, newID()) + if len(self.token_secrets) > 3: + self.token_secrets.pop() + + # Save some parameters for reloading + self.store.saveSelfNode(self.node.id) + self.store.dumpRoutingTable(self.table.buckets) + + # DHT maintenance + self.store.expireValues(self.config['KEY_EXPIRE']) + self.refreshTable() + + self.next_checkpoint = reactor.callLater(randrange(int(self.config['CHECKPOINT_INTERVAL'] * .9), + int(self.config['CHECKPOINT_INTERVAL'] * 1.1)), + self.checkpoint) + + def _loadRoutingTable(self): + """Load the previous routing table nodes from the database. + + It's usually a good idea to call refreshTable(force = True) after + loading the table. + """ + nodes = self.store.getRoutingTable() + for rec in nodes: + n = self.Node(rec[0], rec[1], int(rec[2])) + self.table.insertNode(n, contacted = False) + + #{ Local interface + def addContact(self, host, port, callback=None, errback=None): + """Ping this node and add the contact info to the table on pong. + + @type host: C{string} + @param host: the IP address of the node to contact + @type port: C{int} + @param port:the port of the node to contact + @type callback: C{method} + @param callback: the method to call with the results, it must take 1 + parameter, the contact info returned by the node + (optional, defaults to doing nothing with the results) + @type errback: C{method} + @param errback: the method to call if an error occurs + (optional, defaults to calling the callback with None) + """ + n = self.Node(NULL_ID, host, port) + self.sendJoin(n, callback=callback, errback=errback) + + def findNode(self, id, callback, errback=None): + """Find the contact info for the K closest nodes in the global table. + + @type id: C{string} + @param id: the target ID to find the K closest nodes of + @type callback: C{method} + @param callback: the method to call with the results, it must take 1 + parameter, the list of K closest nodes + @type errback: C{method} + @param errback: the method to call if an error occurs + (optional, defaults to doing nothing when an error occurs) + """ + # Get K nodes out of local table/cache + nodes = self.table.findNodes(id) + d = Deferred() + if errback: + d.addCallbacks(callback, errback) + else: + d.addCallback(callback) + + # If the target ID was found + if len(nodes) == 1 and nodes[0].id == id: + d.callback(nodes) + else: + # Start the finding nodes action + state = FindNode(self, id, d.callback, self.config) + reactor.callLater(0, state.goWithNodes, nodes) + + def insertNode(self, node, contacted = True): + """Try to insert a node in our local table, pinging oldest contact if necessary. + + If all you have is a host/port, then use L{addContact}, which calls this + method after receiving the PONG from the remote node. The reason for + the seperation is we can't insert a node into the table without its + node ID. That means of course the node passed into this method needs + to be a properly formed Node object with a valid ID. + + @type node: L{node.Node} + @param node: the new node to try and insert + @type contacted: C{boolean} + @param contacted: whether the new node is known to be good, i.e. + responded to a request (optional, defaults to True) + """ + old = self.table.insertNode(node, contacted=contacted) + if (old and old.id != self.node.id and + (datetime.now() - old.lastSeen) > + timedelta(seconds=self.config['MIN_PING_INTERVAL'])): + + def _staleNodeHandler(oldnode = old, newnode = node): + """The pinged node never responded, so replace it.""" + self.table.replaceStaleNode(oldnode, newnode) + + def _notStaleNodeHandler(dict, old=old): + """Got a pong from the old node, so update it.""" + dict = dict['rsp'] + if dict['id'] == old.id: + self.table.justSeenNode(old.id) + + # Bucket is full, check to see if old node is still available + df = old.ping(self.node.id) + df.addCallbacks(_notStaleNodeHandler, _staleNodeHandler) + + def sendJoin(self, node, callback=None, errback=None): + """Join the DHT by pinging a bootstrap node. + + @type node: L{node.Node} + @param node: the node to send the join to + @type callback: C{method} + @param callback: the method to call with the results, it must take 1 + parameter, the contact info returned by the node + (optional, defaults to doing nothing with the results) + @type errback: C{method} + @param errback: the method to call if an error occurs + (optional, defaults to calling the callback with None) + """ + + def _pongHandler(dict, node=node, self=self, callback=callback): + """Node responded properly, callback with response.""" + n = self.Node(dict['rsp']['id'], dict['_krpc_sender'][0], dict['_krpc_sender'][1]) + self.insertNode(n) + if callback: + callback((dict['rsp']['ip_addr'], dict['rsp']['port'])) + + def _defaultPong(err, node=node, table=self.table, callback=callback, errback=errback): + """Error occurred, fail node and errback or callback with error.""" + table.nodeFailed(node) + if errback: + errback() + elif callback: + callback(None) + + df = node.join(self.node.id) + df.addCallbacks(_pongHandler, _defaultPong) + + def findCloseNodes(self, callback=lambda a: None, errback = None): + """Perform a findNode on the ID one away from our own. + + This will allow us to populate our table with nodes on our network + closest to our own. This is called as soon as we start up with an + empty table. + + @type callback: C{method} + @param callback: the method to call with the results, it must take 1 + parameter, the list of K closest nodes + (optional, defaults to doing nothing with the results) + @type errback: C{method} + @param errback: the method to call if an error occurs + (optional, defaults to doing nothing when an error occurs) + """ + id = self.node.id[:-1] + chr((ord(self.node.id[-1]) + 1) % 256) + self.findNode(id, callback, errback) + + def refreshTable(self, force = False): + """Check all the buckets for those that need refreshing. + + @param force: refresh all buckets regardless of last bucket access time + (optional, defaults to False) + """ + def callback(nodes): + pass + + for bucket in self.table.buckets: + if force or (datetime.now() - bucket.lastAccessed > + timedelta(seconds=self.config['BUCKET_STALENESS'])): + # Choose a random ID in the bucket and try and find it + id = newIDInRange(bucket.min, bucket.max) + self.findNode(id, callback) + + def stats(self): + """Collect some statistics about the DHT. + + @rtype: (C{int}, C{int}) + @return: the number contacts in our routing table, and the estimated + number of nodes in the entire DHT + """ + num_contacts = reduce(lambda a, b: a + len(b.l), self.table.buckets, 0) + num_nodes = self.config['K'] * (2**(len(self.table.buckets) - 1)) + return (num_contacts, num_nodes) + + def shutdown(self): + """Closes the port and cancels pending later calls.""" + self.listenport.stopListening() + try: + self.next_checkpoint.cancel() + except: + pass + self.store.close() + + #{ Remote interface + def krpc_ping(self, id, _krpc_sender): + """Pong with our ID. + + @type id: C{string} + @param id: the node ID of the sender node + @type _krpc_sender: (C{string}, C{int}) + @param _krpc_sender: the sender node's IP address and port + """ + n = self.Node(id, _krpc_sender[0], _krpc_sender[1]) + self.insertNode(n, contacted = False) + + return {"id" : self.node.id} + + def krpc_join(self, id, _krpc_sender): + """Add the node by responding with its address and port. + + @type id: C{string} + @param id: the node ID of the sender node + @type _krpc_sender: (C{string}, C{int}) + @param _krpc_sender: the sender node's IP address and port + """ + n = self.Node(id, _krpc_sender[0], _krpc_sender[1]) + self.insertNode(n, contacted = False) + + return {"ip_addr" : _krpc_sender[0], "port" : _krpc_sender[1], "id" : self.node.id} + + def krpc_find_node(self, target, id, _krpc_sender): + """Find the K closest nodes to the target in the local routing table. + + @type target: C{string} + @param target: the target ID to find nodes for + @type id: C{string} + @param id: the node ID of the sender node + @type _krpc_sender: (C{string}, C{int}) + @param _krpc_sender: the sender node's IP address and port + """ + n = self.Node(id, _krpc_sender[0], _krpc_sender[1]) + self.insertNode(n, contacted = False) + + nodes = self.table.findNodes(target) + nodes = map(lambda node: node.contactInfo(), nodes) + token = sha(self.token_secrets[0] + _krpc_sender[0]).digest() + return {"nodes" : nodes, "token" : token, "id" : self.node.id} + + +class KhashmirRead(KhashmirBase): + """The read-only Khashmir class, which can only retrieve (not store) key/value mappings.""" + + _Node = KNodeRead + + #{ Local interface + def findValue(self, key, callback, errback=None): + """Get the nodes that have values for the key from the global table. + + @type key: C{string} + @param key: the target key to find the values for + @type callback: C{method} + @param callback: the method to call with the results, it must take 1 + parameter, the list of nodes with values + @type errback: C{method} + @param errback: the method to call if an error occurs + (optional, defaults to doing nothing when an error occurs) + """ + # Get K nodes out of local table/cache + nodes = self.table.findNodes(key) + d = Deferred() + if errback: + d.addCallbacks(callback, errback) + else: + d.addCallback(callback) + + # Search for others starting with the locally found ones + state = FindValue(self, key, d.callback, self.config) + reactor.callLater(0, state.goWithNodes, nodes) + + def valueForKey(self, key, callback, searchlocal = True): + """Get the values found for key in global table. + + Callback will be called with a list of values for each peer that + returns unique values. The final callback will be an empty list. + + @type key: C{string} + @param key: the target key to get the values for + @type callback: C{method} + @param callback: the method to call with the results, it must take 2 + parameters: the key, and the values found + @type searchlocal: C{boolean} + @param searchlocal: whether to also look for any local values + """ + # Get any local values + if searchlocal: + l = self.store.retrieveValues(key) + if len(l) > 0: + reactor.callLater(0, callback, key, l) + else: + l = [] + + def _getValueForKey(nodes, key=key, local_values=l, response=callback, self=self): + """Use the found nodes to send requests for values to.""" + state = GetValue(self, key, local_values, self.config['RETRIEVE_VALUES'], response, self.config) + reactor.callLater(0, state.goWithNodes, nodes) + + # First lookup nodes that have values for the key + self.findValue(key, _getValueForKey) + + #{ Remote interface + def krpc_find_value(self, key, id, _krpc_sender): + """Find the number of values stored locally for the key, and the K closest nodes. + + @type key: C{string} + @param key: the target key to find the values and nodes for + @type id: C{string} + @param id: the node ID of the sender node + @type _krpc_sender: (C{string}, C{int}) + @param _krpc_sender: the sender node's IP address and port + """ + n = self.Node(id, _krpc_sender[0], _krpc_sender[1]) + self.insertNode(n, contacted = False) + + nodes = self.table.findNodes(key) + nodes = map(lambda node: node.contactInfo(), nodes) + num_values = self.store.countValues(key) + return {'nodes' : nodes, 'num' : num_values, "id": self.node.id} + + def krpc_get_value(self, key, num, id, _krpc_sender): + """Retrieve the values stored locally for the key. + + @type key: C{string} + @param key: the target key to retrieve the values for + @type num: C{int} + @param num: the maximum number of values to retrieve, or 0 to + retrieve all of them + @type id: C{string} + @param id: the node ID of the sender node + @type _krpc_sender: (C{string}, C{int}) + @param _krpc_sender: the sender node's IP address and port + """ + n = self.Node(id, _krpc_sender[0], _krpc_sender[1]) + self.insertNode(n, contacted = False) + + l = self.store.retrieveValues(key) + if num == 0 or num >= len(l): + return {'values' : l, "id": self.node.id} + else: + shuffle(l) + return {'values' : l[:num], "id": self.node.id} + + +class KhashmirWrite(KhashmirRead): + """The read-write Khashmir class, which can store and retrieve key/value mappings.""" + + _Node = KNodeWrite + + #{ Local interface + def storeValueForKey(self, key, value, callback=None): + """Stores the value for the key in the global table. + + No status in this implementation, peers respond but don't indicate + status of storing values. + + @type key: C{string} + @param key: the target key to store the value for + @type value: C{string} + @param value: the value to store with the key + @type callback: C{method} + @param callback: the method to call with the results, it must take 3 + parameters: the key, the value stored, and the result of the store + (optional, defaults to doing nothing with the results) + """ + def _storeValueForKey(nodes, key=key, value=value, response=callback, self=self): + """Use the returned K closest nodes to store the key at.""" + if not response: + def _storedValueHandler(key, value, sender): + """Default callback that does nothing.""" + pass + response = _storedValueHandler + action = StoreValue(self, key, value, self.config['STORE_REDUNDANCY'], response, self.config) + reactor.callLater(0, action.goWithNodes, nodes) + + # First find the K closest nodes to operate on. + self.findNode(key, _storeValueForKey) + + #{ Remote interface + def krpc_store_value(self, key, value, token, id, _krpc_sender): + """Store the value locally with the key. + + @type key: C{string} + @param key: the target key to store the value for + @type value: C{string} + @param value: the value to store with the key + @param token: the token to confirm that this peer contacted us previously + @type id: C{string} + @param id: the node ID of the sender node + @type _krpc_sender: (C{string}, C{int}) + @param _krpc_sender: the sender node's IP address and port + """ + n = self.Node(id, _krpc_sender[0], _krpc_sender[1]) + self.insertNode(n, contacted = False) + for secret in self.token_secrets: + this_token = sha(secret + _krpc_sender[0]).digest() + if token == this_token: + self.store.storeValue(key, value) + return {"id" : self.node.id} + raise krpc.KrpcError, (krpc.KRPC_ERROR_INVALID_TOKEN, 'token is invalid, do a find_nodes to get a fresh one') + + +class Khashmir(KhashmirWrite): + """The default Khashmir class (currently the read-write L{KhashmirWrite}).""" + _Node = KNodeWrite + + +class SimpleTests(unittest.TestCase): + + timeout = 10 + DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160, + 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4, + 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000, + 'MAX_FAILURES': 3, + 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600, + 'KEY_EXPIRE': 3600, 'SPEW': False, } + + def setUp(self): + d = self.DHT_DEFAULTS.copy() + d['PORT'] = 4044 + self.a = Khashmir(d) + d = self.DHT_DEFAULTS.copy() + d['PORT'] = 4045 + self.b = Khashmir(d) + + def tearDown(self): + self.a.shutdown() + self.b.shutdown() + os.unlink(self.a.store.db) + os.unlink(self.b.store.db) + + def testAddContact(self): + self.failUnlessEqual(len(self.a.table.buckets), 1) + self.failUnlessEqual(len(self.a.table.buckets[0].l), 0) + + self.failUnlessEqual(len(self.b.table.buckets), 1) + self.failUnlessEqual(len(self.b.table.buckets[0].l), 0) + + self.a.addContact('127.0.0.1', 4045) + reactor.iterate() + reactor.iterate() + reactor.iterate() + reactor.iterate() + + self.failUnlessEqual(len(self.a.table.buckets), 1) + self.failUnlessEqual(len(self.a.table.buckets[0].l), 1) + self.failUnlessEqual(len(self.b.table.buckets), 1) + self.failUnlessEqual(len(self.b.table.buckets[0].l), 1) + + def testStoreRetrieve(self): + self.a.addContact('127.0.0.1', 4045) + reactor.iterate() + reactor.iterate() + reactor.iterate() + reactor.iterate() + self.got = 0 + self.a.storeValueForKey(sha('foo').digest(), 'foobar') + reactor.iterate() + reactor.iterate() + reactor.iterate() + reactor.iterate() + reactor.iterate() + reactor.iterate() + self.a.valueForKey(sha('foo').digest(), self._cb) + reactor.iterate() + reactor.iterate() + reactor.iterate() + reactor.iterate() + reactor.iterate() + reactor.iterate() + reactor.iterate() + + def _cb(self, key, val): + if not val: + self.failUnlessEqual(self.got, 1) + elif 'foobar' in val: + self.got = 1 + + +class MultiTest(unittest.TestCase): + + timeout = 30 + num = 20 + DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160, + 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4, + 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000, + 'MAX_FAILURES': 3, + 'MIN_PING_INTERVAL': 900,'BUCKET_STALENESS': 3600, + 'KEY_EXPIRE': 3600, 'SPEW': False, } + + def _done(self, val): + self.done = 1 + + def setUp(self): + self.l = [] + self.startport = 4088 + for i in range(self.num): + d = self.DHT_DEFAULTS.copy() + d['PORT'] = self.startport + i + self.l.append(Khashmir(d)) + reactor.iterate() + reactor.iterate() + + for i in self.l: + i.addContact('127.0.0.1', self.l[randrange(0,self.num)].port) + i.addContact('127.0.0.1', self.l[randrange(0,self.num)].port) + i.addContact('127.0.0.1', self.l[randrange(0,self.num)].port) + reactor.iterate() + reactor.iterate() + reactor.iterate() + + for i in self.l: + self.done = 0 + i.findCloseNodes(self._done) + while not self.done: + reactor.iterate() + for i in self.l: + self.done = 0 + i.findCloseNodes(self._done) + while not self.done: + reactor.iterate() + + def tearDown(self): + for i in self.l: + i.shutdown() + os.unlink(i.store.db) + + reactor.iterate() + + def testStoreRetrieve(self): + for i in range(10): + K = newID() + V = newID() + + for a in range(3): + self.done = 0 + def _scb(key, value, result): + self.done = 1 + self.l[randrange(0, self.num)].storeValueForKey(K, V, _scb) + while not self.done: + reactor.iterate() + + + def _rcb(key, val): + if not val: + self.done = 1 + self.failUnlessEqual(self.got, 1) + elif V in val: + self.got = 1 + for x in range(3): + self.got = 0 + self.done = 0 + self.l[randrange(0, self.num)].valueForKey(K, _rcb) + while not self.done: + reactor.iterate() diff --git a/apt_p2p_Khashmir/knode.py b/apt_p2p_Khashmir/knode.py new file mode 100644 index 0000000..e7fb6b3 --- /dev/null +++ b/apt_p2p_Khashmir/knode.py @@ -0,0 +1,78 @@ +## Copyright 2002-2004 Andrew Loewenstern, All Rights Reserved +# see LICENSE.txt for license information + +"""Represents a khashmir node in the DHT.""" + +from twisted.python import log + +from node import Node, NULL_ID + +class KNodeBase(Node): + """A basic node that can only be pinged and help find other nodes.""" + + def checkSender(self, dict): + """Check the sender's info to make sure it meets expectations.""" + try: + senderid = dict['rsp']['id'] + except KeyError: + log.msg("No peer id in response") + raise Exception, "No peer id in response." + else: + if self.id != NULL_ID and senderid != self.id: + log.msg("Got response from different node than expected.") + self.table.invalidateNode(self) + + return dict + + def errBack(self, err): + """Log an error that has occurred.""" + log.err(err) + return err + + def ping(self, id): + """Ping the node.""" + df = self.conn.sendRequest('ping', {"id":id}) + df.addErrback(self.errBack) + df.addCallback(self.checkSender) + return df + + def join(self, id): + """Use the node to bootstrap into the system.""" + df = self.conn.sendRequest('join', {"id":id}) + df.addErrback(self.errBack) + df.addCallback(self.checkSender) + return df + + def findNode(self, id, target): + """Request the nearest nodes to the target that the node knows about.""" + df = self.conn.sendRequest('find_node', {"target" : target, "id": id}) + df.addErrback(self.errBack) + df.addCallback(self.checkSender) + return df + +class KNodeRead(KNodeBase): + """More advanced node that can also find and send values.""" + + def findValue(self, id, key): + """Request the nearest nodes to the key that the node knows about.""" + df = self.conn.sendRequest('find_value', {"key" : key, "id" : id}) + df.addErrback(self.errBack) + df.addCallback(self.checkSender) + return df + + def getValue(self, id, key, num): + """Request the values that the node has for the key.""" + df = self.conn.sendRequest('get_value', {"key" : key, "num": num, "id" : id}) + df.addErrback(self.errBack) + df.addCallback(self.checkSender) + return df + +class KNodeWrite(KNodeRead): + """Most advanced node that can also store values.""" + + def storeValue(self, id, key, value, token): + """Store a value in the node.""" + df = self.conn.sendRequest('store_value', {"key" : key, "value" : value, "token" : token, "id": id}) + df.addErrback(self.errBack) + df.addCallback(self.checkSender) + return df diff --git a/apt_p2p_Khashmir/krpc.py b/apt_p2p_Khashmir/krpc.py new file mode 100644 index 0000000..a4fbacc --- /dev/null +++ b/apt_p2p_Khashmir/krpc.py @@ -0,0 +1,561 @@ +## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved +# see LICENSE.txt for license information + +"""The KRPC communication protocol implementation. + +@var KRPC_TIMEOUT: the number of seconds after which requests timeout +@var UDP_PACKET_LIMIT: the maximum number of bytes that can be sent in a + UDP packet without fragmentation + +@var KRPC_ERROR: the code for a generic error +@var KRPC_ERROR_SERVER_ERROR: the code for a server error +@var KRPC_ERROR_MALFORMED_PACKET: the code for a malformed packet error +@var KRPC_ERROR_METHOD_UNKNOWN: the code for a method unknown error +@var KRPC_ERROR_MALFORMED_REQUEST: the code for a malformed request error +@var KRPC_ERROR_INVALID_TOKEN: the code for an invalid token error +@var KRPC_ERROR_RESPONSE_TOO_LONG: the code for a response too long error + +@var KRPC_ERROR_INTERNAL: the code for an internal error +@var KRPC_ERROR_RECEIVED_UNKNOWN: the code for an unknown message type error +@var KRPC_ERROR_TIMEOUT: the code for a timeout error +@var KRPC_ERROR_PROTOCOL_STOPPED: the code for a stopped protocol error + +@var TID: the identifier for the transaction ID +@var REQ: the identifier for a request packet +@var RSP: the identifier for a response packet +@var TYP: the identifier for the type of packet +@var ARG: the identifier for the argument to the request +@var ERR: the identifier for an error packet + +@group Remote node error codes: KRPC_ERROR, KRPC_ERROR_SERVER_ERROR, + KRPC_ERROR_MALFORMED_PACKET, KRPC_ERROR_METHOD_UNKNOWN, + KRPC_ERROR_MALFORMED_REQUEST, KRPC_ERROR_INVALID_TOKEN, + KRPC_ERROR_RESPONSE_TOO_LONG +@group Local node error codes: KRPC_ERROR_INTERNAL, KRPC_ERROR_RECEIVED_UNKNOWN, + KRPC_ERROR_TIMEOUT, KRPC_ERROR_PROTOCOL_STOPPED +@group Command identifiers: TID, REQ, RSP, TYP, ARG, ERR + +""" + +from bencode import bencode, bdecode +from time import asctime +from math import ceil + +from twisted.internet.defer import Deferred +from twisted.internet import protocol, reactor +from twisted.python import log +from twisted.trial import unittest + +from khash import newID + +KRPC_TIMEOUT = 20 +UDP_PACKET_LIMIT = 1472 + +# Remote node errors +KRPC_ERROR = 200 +KRPC_ERROR_SERVER_ERROR = 201 +KRPC_ERROR_MALFORMED_PACKET = 202 +KRPC_ERROR_METHOD_UNKNOWN = 203 +KRPC_ERROR_MALFORMED_REQUEST = 204 +KRPC_ERROR_INVALID_TOKEN = 205 +KRPC_ERROR_RESPONSE_TOO_LONG = 206 + +# Local errors +KRPC_ERROR_INTERNAL = 100 +KRPC_ERROR_RECEIVED_UNKNOWN = 101 +KRPC_ERROR_TIMEOUT = 102 +KRPC_ERROR_PROTOCOL_STOPPED = 103 + +# commands +TID = 't' +REQ = 'q' +RSP = 'r' +TYP = 'y' +ARG = 'a' +ERR = 'e' + +class KrpcError(Exception): + """An error occurred in the KRPC protocol.""" + pass + +def verifyMessage(msg): + """Check received message for corruption and errors. + + @type msg: C{dictionary} + @param msg: the dictionary of information received on the connection + @raise KrpcError: if the message is corrupt + """ + + if type(msg) != dict: + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "not a dictionary") + if TYP not in msg: + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "no message type") + if msg[TYP] == REQ: + if REQ not in msg: + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "request type not specified") + if type(msg[REQ]) != str: + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "request type is not a string") + if ARG not in msg: + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "no arguments for request") + if type(msg[ARG]) != dict: + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "arguments for request are not in a dictionary") + elif msg[TYP] == RSP: + if RSP not in msg: + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "response not specified") + if type(msg[RSP]) != dict: + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "response is not a dictionary") + elif msg[TYP] == ERR: + if ERR not in msg: + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error not specified") + if type(msg[ERR]) != list: + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error is not a list") + if len(msg[ERR]) != 2: + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error is not a 2-element list") + if type(msg[ERR][0]) not in (int, long): + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error number is not a number") + if type(msg[ERR][1]) != str: + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "error string is not a string") +# else: +# raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "unknown message type") + if TID not in msg: + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "no transaction ID specified") + if type(msg[TID]) != str: + raise KrpcError, (KRPC_ERROR_MALFORMED_PACKET, "transaction id is not a string") + +class hostbroker(protocol.DatagramProtocol): + """The factory for the KRPC protocol. + + @type server: L{khashmir.Khashmir} + @ivar server: the main Khashmir program + @type config: C{dictionary} + @ivar config: the configuration parameters for the DHT + @type connections: C{dictionary} + @ivar connections: all the connections that have ever been made to the + protocol, keys are IP address and port pairs, values are L{KRPC} + protocols for the addresses + @ivar protocol: the protocol to use to handle incoming connections + (added externally) + @type addr: (C{string}, C{int}) + @ivar addr: the IP address and port of this node + """ + + def __init__(self, server, config): + """Initialize the factory. + + @type server: L{khashmir.Khashmir} + @param server: the main DHT program + @type config: C{dictionary} + @param config: the configuration parameters for the DHT + """ + self.server = server + self.config = config + # this should be changed to storage that drops old entries + self.connections = {} + + def datagramReceived(self, datagram, addr): + """Optionally create a new protocol object, and handle the new datagram. + + @type datagram: C{string} + @param datagram: the data received from the transport. + @type addr: (C{string}, C{int}) + @param addr: source IP address and port of datagram. + """ + c = self.connectionForAddr(addr) + c.datagramReceived(datagram, addr) + #if c.idle(): + # del self.connections[addr] + + def connectionForAddr(self, addr): + """Get a protocol object for the source. + + @type addr: (C{string}, C{int}) + @param addr: source IP address and port of datagram. + """ + # Don't connect to ourself + if addr == self.addr: + raise KrcpError + + # Create a new protocol object if necessary + if not self.connections.has_key(addr): + conn = self.protocol(addr, self.server, self.transport, self.config['SPEW']) + self.connections[addr] = conn + else: + conn = self.connections[addr] + return conn + + def makeConnection(self, transport): + """Make a connection to a transport and save our address.""" + protocol.DatagramProtocol.makeConnection(self, transport) + tup = transport.getHost() + self.addr = (tup.host, tup.port) + + def stopProtocol(self): + """Stop all the open connections.""" + for conn in self.connections.values(): + conn.stop() + protocol.DatagramProtocol.stopProtocol(self) + +class KRPC: + """The KRPC protocol implementation. + + @ivar transport: the transport to use for the protocol + @type factory: L{khashmir.Khashmir} + @ivar factory: the main Khashmir program + @type addr: (C{string}, C{int}) + @ivar addr: the IP address and port of the source node + @type noisy: C{boolean} + @ivar noisy: whether to log additional details of the protocol + @type tids: C{dictionary} + @ivar tids: the transaction IDs outstanding for requests, keys are the + transaction ID of the request, values are the deferreds to call with + the results + @type stopped: C{boolean} + @ivar stopped: whether the protocol has been stopped + """ + + def __init__(self, addr, server, transport, spew = False): + """Initialize the protocol. + + @type addr: (C{string}, C{int}) + @param addr: the IP address and port of the source node + @type server: L{khashmir.Khashmir} + @param server: the main Khashmir program + @param transport: the transport to use for the protocol + @type spew: C{boolean} + @param spew: whether to log additional details of the protocol + (optional, defaults to False) + """ + self.transport = transport + self.factory = server + self.addr = addr + self.noisy = spew + self.tids = {} + self.stopped = False + + def datagramReceived(self, data, addr): + """Process the new datagram. + + @type data: C{string} + @param data: the data received from the transport. + @type addr: (C{string}, C{int}) + @param addr: source IP address and port of datagram. + """ + if self.stopped: + if self.noisy: + log.msg("stopped, dropping message from %r: %s" % (addr, data)) + + # Bdecode the message + try: + msg = bdecode(data) + except Exception, e: + if self.noisy: + log.msg("krpc bdecode error: ") + log.err(e) + return + + # Make sure the remote node isn't trying anything funny + try: + verifyMessage(msg) + except Exception, e: + log.msg("krpc message verification error: ") + log.err(e) + return + + if self.noisy: + log.msg("%d received from %r: %s" % (self.factory.port, addr, msg)) + + # Process it based on its type + if msg[TYP] == REQ: + ilen = len(data) + + # Requests are handled by the factory + f = getattr(self.factory ,"krpc_" + msg[REQ], None) + msg[ARG]['_krpc_sender'] = self.addr + if f and callable(f): + try: + ret = f(*(), **msg[ARG]) + except KrpcError, e: + log.msg('Got a Krpc error while running: krpc_%s' % msg[REQ]) + log.err(e) + olen = self._sendResponse(addr, msg[TID], ERR, [e[0], e[1]]) + except TypeError, e: + log.msg('Got a malformed request for: krpc_%s' % msg[REQ]) + log.err(e) + olen = self._sendResponse(addr, msg[TID], ERR, + [KRPC_ERROR_MALFORMED_REQUEST, str(e)]) + except Exception, e: + log.msg('Got an unknown error while running: krpc_%s' % msg[REQ]) + log.err(e) + olen = self._sendResponse(addr, msg[TID], ERR, + [KRPC_ERROR_SERVER_ERROR, str(e)]) + else: + olen = self._sendResponse(addr, msg[TID], RSP, ret) + else: + # Request for unknown method + log.msg("ERROR: don't know about method %s" % msg[REQ]) + olen = self._sendResponse(addr, msg[TID], ERR, + [KRPC_ERROR_METHOD_UNKNOWN, "unknown method "+str(msg[REQ])]) + if self.noisy: + log.msg("%s >>> %s - %s %s %s" % (addr, self.factory.node.port, + ilen, msg[REQ], olen)) + elif msg[TYP] == RSP: + # Responses get processed by their TID's deferred + if self.tids.has_key(msg[TID]): + df = self.tids[msg[TID]] + # callback + del(self.tids[msg[TID]]) + df.callback({'rsp' : msg[RSP], '_krpc_sender': addr}) + else: + # no tid, this transaction timed out already... + if self.noisy: + log.msg('timeout: %r' % msg[RSP]['id']) + elif msg[TYP] == ERR: + # Errors get processed by their TID's deferred's errback + if self.tids.has_key(msg[TID]): + df = self.tids[msg[TID]] + del(self.tids[msg[TID]]) + # callback + df.errback(KrpcError(*msg[ERR])) + else: + # day late and dollar short, just log it + log.msg("Got an error for an unknown request: %r" % (msg[ERR], )) + pass + else: + # Received an unknown message type + if self.noisy: + log.msg("unknown message type: %r" % msg) + if msg[TID] in self.tids: + df = self.tids[msg[TID]] + del(self.tids[msg[TID]]) + # callback + df.errback(KrpcError(KRPC_ERROR_RECEIVED_UNKNOWN, + "Received an unknown message type: %r" % msg[TYP])) + + def _sendResponse(self, addr, tid, msgType, response): + """Helper function for sending responses to nodes. + + @type addr: (C{string}, C{int}) + @param addr: source IP address and port of datagram. + @param tid: the transaction ID of the request + @param msgType: the type of message to respond with + @param response: the arguments for the response + """ + if not response: + response = {} + + try: + # Create the response message + msg = {TID : tid, TYP : msgType, msgType : response} + + if self.noisy: + log.msg("%d responding to %r: %s" % (self.factory.port, addr, msg)) + + out = bencode(msg) + + # Make sure its not too long + if len(out) > UDP_PACKET_LIMIT: + # Can we remove some values to shorten it? + if 'values' in response: + # Save the original list of values + orig_values = response['values'] + len_orig_values = len(bencode(orig_values)) + + # Caclulate the maximum value length possible + max_len_values = len_orig_values - (len(out) - UDP_PACKET_LIMIT) + assert max_len_values > 0 + + # Start with a calculation of how many values should be included + # (assumes all values are the same length) + per_value = (float(len_orig_values) - 2.0) / float(len(orig_values)) + num_values = len(orig_values) - int(ceil(float(len(out) - UDP_PACKET_LIMIT) / per_value)) + + # Do a linear search for the actual maximum number possible + bencoded_values = len(bencode(orig_values[:num_values])) + while bencoded_values < max_len_values and num_values + 1 < len(orig_values): + bencoded_values += len(bencode(orig_values[num_values])) + num_values += 1 + while bencoded_values > max_len_values and num_values > 0: + num_values -= 1 + bencoded_values -= len(bencode(orig_values[num_values])) + assert num_values > 0 + + # Encode the result + response['values'] = orig_values[:num_values] + out = bencode(msg) + assert len(out) < UDP_PACKET_LIMIT + log.msg('Shortened a long packet from %d to %d values, new packet length: %d' % + (len(orig_values), num_values, len(out))) + else: + # Too long a response, send an error + log.msg('Could not send response, too long: %d bytes' % len(out)) + msg = {TID : tid, TYP : ERR, ERR : [KRPC_ERROR_RESPONSE_TOO_LONG, "response was %d bytes" % len(out)]} + out = bencode(msg) + + except Exception, e: + # Unknown error, send an error message + msg = {TID : tid, TYP : ERR, ERR : [KRPC_ERROR_SERVER_ERROR, "unknown error sending response: %s" % str(e)]} + out = bencode(msg) + + self.transport.write(out, addr) + return len(out) + + def sendRequest(self, method, args): + """Send a request to the remote node. + + @type method: C{string} + @param method: the methiod name to call on the remote node + @param args: the arguments to send to the remote node's method + """ + if self.stopped: + raise KrpcError, (KRPC_ERROR_PROTOCOL_STOPPED, "cannot send, connection has been stopped") + + # Create the request message + msg = {TID : newID(), TYP : REQ, REQ : method, ARG : args} + if self.noisy: + log.msg("%d sending to %r: %s" % (self.factory.port, self.addr, msg)) + data = bencode(msg) + + # Create the deferred and save it with the TID + d = Deferred() + self.tids[msg[TID]] = d + + # Schedule a later timeout call + def timeOut(tids = self.tids, id = msg[TID], method = method, addr = self.addr): + """Call the deferred's errback if a timeout occurs.""" + if tids.has_key(id): + df = tids[id] + del(tids[id]) + df.errback(KrpcError(KRPC_ERROR_TIMEOUT, "timeout waiting for '%s' from %r" % (method, addr))) + later = reactor.callLater(KRPC_TIMEOUT, timeOut) + + # Cancel the timeout call if a response is received + def dropTimeOut(dict, later_call = later): + """Cancel the timeout call when a response is received.""" + if later_call.active(): + later_call.cancel() + return dict + d.addBoth(dropTimeOut) + + self.transport.write(data, self.addr) + return d + + def stop(self): + """Timeout all pending requests.""" + for df in self.tids.values(): + df.errback(KrpcError(KRPC_ERROR_PROTOCOL_STOPPED, 'connection has been stopped while waiting for response')) + self.tids = {} + self.stopped = True + +#{ For testing the KRPC protocol +def connectionForAddr(host, port): + return host + +class Receiver(protocol.Factory): + protocol = KRPC + def __init__(self): + self.buf = [] + def krpc_store(self, msg, _krpc_sender): + self.buf += [msg] + return {} + def krpc_echo(self, msg, _krpc_sender): + return {'msg': msg} + def krpc_values(self, length, num, _krpc_sender): + return {'values': ['1'*length]*num} + +def make(port): + af = Receiver() + a = hostbroker(af, {'SPEW': False}) + a.protocol = KRPC + p = reactor.listenUDP(port, a) + return af, a, p + +class KRPCTests(unittest.TestCase): + timeout = 2 + + def setUp(self): + self.af, self.a, self.ap = make(1180) + self.bf, self.b, self.bp = make(1181) + + def tearDown(self): + self.ap.stopListening() + self.bp.stopListening() + + def bufEquals(self, result, value): + self.failUnlessEqual(self.bf.buf, value) + + def testSimpleMessage(self): + d = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('store', {'msg' : "This is a test."}) + d.addCallback(self.bufEquals, ["This is a test."]) + return d + + def testMessageBlast(self): + for i in range(100): + d = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('store', {'msg' : "This is a test."}) + d.addCallback(self.bufEquals, ["This is a test."] * 100) + return d + + def testEcho(self): + df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."}) + df.addCallback(self.gotMsg, "This is a test.") + return df + + def gotMsg(self, dict, should_be): + _krpc_sender = dict['_krpc_sender'] + msg = dict['rsp'] + self.failUnlessEqual(msg['msg'], should_be) + + def testManyEcho(self): + for i in xrange(100): + df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."}) + df.addCallback(self.gotMsg, "This is a test.") + return df + + def testMultiEcho(self): + df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."}) + df.addCallback(self.gotMsg, "This is a test.") + + df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is another test."}) + df.addCallback(self.gotMsg, "This is another test.") + + df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is yet another test."}) + df.addCallback(self.gotMsg, "This is yet another test.") + + return df + + def testEchoReset(self): + df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test."}) + df.addCallback(self.gotMsg, "This is a test.") + + df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is another test."}) + df.addCallback(self.gotMsg, "This is another test.") + df.addCallback(self.echoReset) + return df + + def echoReset(self, dict): + del(self.a.connections[('127.0.0.1', 1181)]) + df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is yet another test."}) + df.addCallback(self.gotMsg, "This is yet another test.") + return df + + def testUnknownMeth(self): + df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('blahblah', {'msg' : "This is a test."}) + df.addBoth(self.gotErr, KRPC_ERROR_METHOD_UNKNOWN) + return df + + def testMalformedRequest(self): + df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('echo', {'msg' : "This is a test.", 'foo': 'bar'}) + df.addBoth(self.gotErr, KRPC_ERROR_MALFORMED_REQUEST) + return df + + def gotErr(self, err, should_be): + self.failUnlessEqual(err.value[0], should_be) + + def testLongPackets(self): + df = self.a.connectionForAddr(('127.0.0.1', 1181)).sendRequest('values', {'length' : 1, 'num': 2000}) + df.addCallback(self.gotLongRsp) + return df + + def gotLongRsp(self, dict): + # Not quite accurate, but good enough + self.failUnless(len(bencode(dict))-10 < UDP_PACKET_LIMIT) + \ No newline at end of file diff --git a/apt_p2p_Khashmir/ktable.py b/apt_p2p_Khashmir/ktable.py new file mode 100644 index 0000000..fb0c371 --- /dev/null +++ b/apt_p2p_Khashmir/ktable.py @@ -0,0 +1,335 @@ +## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved +# see LICENSE.txt for license information + +"""The routing table and buckets for a kademlia-like DHT.""" + +from datetime import datetime +from bisect import bisect_left + +from twisted.python import log +from twisted.trial import unittest + +import khash +from node import Node, NULL_ID + +class KTable: + """Local routing table for a kademlia-like distributed hash table. + + @type node: L{node.Node} + @ivar node: the local node + @type config: C{dictionary} + @ivar config: the configuration parameters for the DHT + @type buckets: C{list} of L{KBucket} + @ivar buckets: the buckets of nodes in the routing table + """ + + def __init__(self, node, config): + """Initialize the first empty bucket of everything. + + @type node: L{node.Node} + @param node: the local node + @type config: C{dictionary} + @param config: the configuration parameters for the DHT + """ + # this is the root node, a.k.a. US! + assert node.id != NULL_ID + self.node = node + self.config = config + self.buckets = [KBucket([], 0L, 2L**self.config['HASH_LENGTH'])] + + def _bucketIndexForInt(self, num): + """Find the index of the bucket that should hold the node's ID number.""" + return bisect_left(self.buckets, num) + + def findNodes(self, id): + """Find the K nodes in our own local table closest to the ID. + + @type id: C{string} of C{int} or L{node.Node} + @param id: the ID to find nodes that are close to + @raise TypeError: if id does not properly identify an ID + """ + + # Get the ID number from the input + if isinstance(id, str): + num = khash.intify(id) + elif isinstance(id, Node): + num = id.num + elif isinstance(id, int) or isinstance(id, long): + num = id + else: + raise TypeError, "findNodes requires an int, string, or Node" + + nodes = [] + i = self._bucketIndexForInt(num) + + # If this node is already in our table then return it + try: + index = self.buckets[i].l.index(num) + except ValueError: + pass + else: + return [self.buckets[i].l[index]] + + # Don't have the node, get the K closest nodes from the appropriate bucket + nodes = nodes + self.buckets[i].l + + # Make sure we have enough + if len(nodes) < self.config['K']: + # Look in adjoining buckets for nodes + min = i - 1 + max = i + 1 + while len(nodes) < self.config['K'] and (min >= 0 or max < len(self.buckets)): + # Add the adjoining buckets' nodes to the list + if min >= 0: + nodes = nodes + self.buckets[min].l + if max < len(self.buckets): + nodes = nodes + self.buckets[max].l + min = min - 1 + max = max + 1 + + # Sort the found nodes by proximity to the id and return the closest K + nodes.sort(lambda a, b, num=num: cmp(num ^ a.num, num ^ b.num)) + return nodes[:self.config['K']] + + def _splitBucket(self, a): + """Split a bucket in two. + + @type a: L{KBucket} + @param a: the bucket to split + """ + # Create a new bucket with half the (upper) range of the current bucket + diff = (a.max - a.min) / 2 + b = KBucket([], a.max - diff, a.max) + self.buckets.insert(self.buckets.index(a.min) + 1, b) + + # Reduce the input bucket's (upper) range + a.max = a.max - diff + + # Transfer nodes to the new bucket + for anode in a.l[:]: + if anode.num >= a.max: + a.l.remove(anode) + b.l.append(anode) + + def replaceStaleNode(self, stale, new = None): + """Replace a stale node in a bucket with a new one. + + This is used by clients to replace a node returned by insertNode after + it fails to respond to a ping. + + @type stale: L{node.Node} + @param stale: the stale node to remove from the bucket + @type new: L{node.Node} + @param new: the new node to add in it's place (optional, defaults to + not adding any node in the old node's place) + """ + # Find the stale node's bucket + i = self._bucketIndexForInt(stale.num) + try: + it = self.buckets[i].l.index(stale.num) + except ValueError: + return + + # Remove the stale node and insert the new one + del(self.buckets[i].l[it]) + if new: + self.buckets[i].l.append(new) + + def insertNode(self, node, contacted = True): + """Try to insert a node in the routing table. + + This inserts the node, returning None if successful, otherwise returns + the oldest node in the bucket if it's full. The caller is then + responsible for pinging the returned node and calling replaceStaleNode + if it doesn't respond. contacted means that yes, we contacted THEM and + we know the node is reachable. + + @type node: L{node.Node} + @param node: the new node to try and insert + @type contacted: C{boolean} + @param contacted: whether the new node is known to be good, i.e. + responded to a request (optional, defaults to True) + @rtype: L{node.Node} + @return: None if successful (the bucket wasn't full), otherwise returns the oldest node in the bucket + """ + assert node.id != NULL_ID + if node.id == self.node.id: return + + # Get the bucket for this node + i = self. _bucketIndexForInt(node.num) + + # Check to see if node is in the bucket already + try: + it = self.buckets[i].l.index(node.num) + except ValueError: + pass + else: + # The node is already in the bucket + if contacted: + # It responded, so update it + node.updateLastSeen() + # move node to end of bucket + xnode = self.buckets[i].l[it] + del(self.buckets[i].l[it]) + # note that we removed the original and replaced it with the new one + # utilizing this nodes new contact info + self.buckets[i].l.append(xnode) + self.buckets[i].touch() + return + + # We don't have this node, check to see if the bucket is full + if len(self.buckets[i].l) < self.config['K']: + # Not full, append this node and return + if contacted: + node.updateLastSeen() + self.buckets[i].l.append(node) + self.buckets[i].touch() + return + + # Bucket is full, check to see if the local node is not in the bucket + if not (self.buckets[i].min <= self.node < self.buckets[i].max): + # Local node not in the bucket, can't split it, return the oldest node + return self.buckets[i].l[0] + + # Make sure our table isn't FULL, this is really unlikely + if len(self.buckets) >= self.config['HASH_LENGTH']: + log.err("Hash Table is FULL! Increase K!") + return + + # This bucket is full and contains our node, split the bucket + self._splitBucket(self.buckets[i]) + + # Now that the bucket is split and balanced, try to insert the node again + return self.insertNode(node) + + def justSeenNode(self, id): + """Mark a node as just having been seen. + + Call this any time you get a message from a node, it will update it + in the table if it's there. + + @type id: C{string} of C{int} or L{node.Node} + @param id: the node ID to mark as just having been seen + @rtype: C{datetime.datetime} + @return: the old lastSeen time of the node, or None if it's not in the table + """ + try: + n = self.findNodes(id)[0] + except IndexError: + return None + else: + tstamp = n.lastSeen + n.updateLastSeen() + return tstamp + + def invalidateNode(self, n): + """Remove the node from the routing table. + + Forget about node n. Use this when you know that a node is invalid. + """ + self.replaceStaleNode(n) + + def nodeFailed(self, node): + """Mark a node as having failed once, and remove it if it has failed too much.""" + try: + n = self.findNodes(node.num)[0] + except IndexError: + return None + else: + if n.msgFailed() >= self.config['MAX_FAILURES']: + self.invalidateNode(n) + +class KBucket: + """Single bucket of nodes in a kademlia-like routing table. + + @type l: C{list} of L{node.Node} + @ivar l: the nodes that are in this bucket + @type min: C{long} + @ivar min: the minimum node ID that can be in this bucket + @type max: C{long} + @ivar max: the maximum node ID that can be in this bucket + @type lastAccessed: C{datetime.datetime} + @ivar lastAccessed: the last time a node in this bucket was successfully contacted + """ + + def __init__(self, contents, min, max): + """Initialize the bucket with nodes. + + @type contents: C{list} of L{node.Node} + @param contents: the nodes to store in the bucket + @type min: C{long} + @param min: the minimum node ID that can be in this bucket + @type max: C{long} + @param max: the maximum node ID that can be in this bucket + """ + self.l = contents + self.min = min + self.max = max + self.lastAccessed = datetime.now() + + def touch(self): + """Update the L{lastAccessed} time.""" + self.lastAccessed = datetime.now() + + def getNodeWithInt(self, num): + """Get the node in the bucket with that number. + + @type num: C{long} + @param num: the node ID to look for + @raise ValueError: if the node ID is not in the bucket + @rtype: L{node.Node} + @return: the node + """ + if num in self.l: return num + else: raise ValueError + + def __repr__(self): + return "" % (len(self.l), self.min, self.max) + + #{ Comparators to bisect/index a list of buckets (by their range) with either a node or a long + def __lt__(self, a): + if isinstance(a, Node): a = a.num + return self.max <= a + def __le__(self, a): + if isinstance(a, Node): a = a.num + return self.min < a + def __gt__(self, a): + if isinstance(a, Node): a = a.num + return self.min > a + def __ge__(self, a): + if isinstance(a, Node): a = a.num + return self.max >= a + def __eq__(self, a): + if isinstance(a, Node): a = a.num + return self.min <= a and self.max > a + def __ne__(self, a): + if isinstance(a, Node): a = a.num + return self.min >= a or self.max < a + +class TestKTable(unittest.TestCase): + """Unit tests for the routing table.""" + + def setUp(self): + self.a = Node(khash.newID(), '127.0.0.1', 2002) + self.t = KTable(self.a, {'HASH_LENGTH': 160, 'K': 8, 'MAX_FAILURES': 3}) + + def testAddNode(self): + self.b = Node(khash.newID(), '127.0.0.1', 2003) + self.t.insertNode(self.b) + self.failUnlessEqual(len(self.t.buckets[0].l), 1) + self.failUnlessEqual(self.t.buckets[0].l[0], self.b) + + def testRemove(self): + self.testAddNode() + self.t.invalidateNode(self.b) + self.failUnlessEqual(len(self.t.buckets[0].l), 0) + + def testFail(self): + self.testAddNode() + for i in range(self.t.config['MAX_FAILURES'] - 1): + self.t.nodeFailed(self.b) + self.failUnlessEqual(len(self.t.buckets[0].l), 1) + self.failUnlessEqual(self.t.buckets[0].l[0], self.b) + + self.t.nodeFailed(self.b) + self.failUnlessEqual(len(self.t.buckets[0].l), 0) diff --git a/apt_p2p_Khashmir/node.py b/apt_p2p_Khashmir/node.py new file mode 100644 index 0000000..49b8fe7 --- /dev/null +++ b/apt_p2p_Khashmir/node.py @@ -0,0 +1,143 @@ +## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved +# see LICENSE.txt for license information + +"""Represents a node in the DHT. + +@type NULL_ID: C{string} +@var NULL_ID: the node ID to use until one is known +""" + +from datetime import datetime, MINYEAR +from types import InstanceType + +from twisted.trial import unittest + +import khash +from util import compact + +# magic id to use before we know a peer's id +NULL_ID = 20 * '\0' + +class Node: + """Encapsulate a node's contact info. + + @ivar conn: the connection to the remote node (added externally) + @ivar table: the routing table (added externally) + @type fails: C{int} + @ivar fails: number of times this node has failed in a row + @type lastSeen: C{datetime.datetime} + @ivar lastSeen: the last time a response was received from this node + @type id: C{string} + @ivar id: the node's ID in the DHT + @type num: C{long} + @ivar num: the node's ID in number form + @type host: C{string} + @ivar host: the IP address of the node + @type port: C{int} + @ivar port: the port of the node + @type token: C{string} + @ivar token: the last received token from the node + @type num_values: C{int} + @ivar num_values: the number of values the node has for the key in the + currently executing action + """ + + def __init__(self, id, host = None, port = None): + """Initialize the node. + + @type id: C{string} or C{dictionary} + @param id: the node's ID in the DHT, or a dictionary containing the + node's id, host and port + @type host: C{string} + @param host: the IP address of the node + (optional, but must be specified if id is not a dictionary) + @type port: C{int} + @param port: the port of the node + (optional, but must be specified if id is not a dictionary) + """ + self.fails = 0 + self.lastSeen = datetime(MINYEAR, 1, 1) + + # Alternate method, init Node from dictionary + if isinstance(id, dict): + host = id['host'] + port = id['port'] + id = id['id'] + + assert isinstance(id, str) + assert isinstance(host, str) + self.id = id + self.num = khash.intify(id) + self.host = host + self.port = int(port) + self.token = '' + self.num_values = 0 + self._contactInfo = None + + def updateLastSeen(self): + """Updates the last contact time of the node and resets the number of failures.""" + self.lastSeen = datetime.now() + self.fails = 0 + + def updateToken(self, token): + """Update the token for the node.""" + self.token = token + + def updateNumValues(self, num_values): + """Update how many values the node has in the current search for a value.""" + self.num_values = num_values + + def msgFailed(self): + """Log a failed attempt to contact this node. + + @rtype: C{int} + @return: the number of consecutive failures this node has + """ + self.fails = self.fails + 1 + return self.fails + + def contactInfo(self): + """Get the compact contact info for the node.""" + if self._contactInfo is None: + self._contactInfo = compact(self.id, self.host, self.port) + return self._contactInfo + + def __repr__(self): + return `(self.id, self.host, self.port)` + + #{ Comparators to bisect/index a list of nodes with either a node or a long + def __lt__(self, a): + if type(a) == InstanceType: + a = a.num + return self.num < a + def __le__(self, a): + if type(a) == InstanceType: + a = a.num + return self.num <= a + def __gt__(self, a): + if type(a) == InstanceType: + a = a.num + return self.num > a + def __ge__(self, a): + if type(a) == InstanceType: + a = a.num + return self.num >= a + def __eq__(self, a): + if type(a) == InstanceType: + a = a.num + return self.num == a + def __ne__(self, a): + if type(a) == InstanceType: + a = a.num + return self.num != a + + +class TestNode(unittest.TestCase): + """Unit tests for the node implementation.""" + def setUp(self): + self.node = Node(khash.newID(), '127.0.0.1', 2002) + def testUpdateLastSeen(self): + t = self.node.lastSeen + self.node.updateLastSeen() + self.failUnless(t < self.node.lastSeen) + \ No newline at end of file diff --git a/apt_p2p_Khashmir/util.py b/apt_p2p_Khashmir/util.py new file mode 100644 index 0000000..52b6e97 --- /dev/null +++ b/apt_p2p_Khashmir/util.py @@ -0,0 +1,78 @@ +## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved +# see LICENSE.txt for license information + +"""Some utitlity functions for use in apt-p2p's khashmir DHT.""" + +from twisted.trial import unittest + +def bucket_stats(l): + """Given a list of khashmir instances, finds min, max, and average number of nodes in tables.""" + max = avg = 0 + min = None + def count(buckets): + c = 0 + for bucket in buckets: + c = c + len(bucket.l) + return c + for node in l: + c = count(node.table.buckets) + if min == None: + min = c + elif c < min: + min = c + if c > max: + max = c + avg = avg + c + avg = avg / len(l) + return {'min':min, 'max':max, 'avg':avg} + +def uncompact(s): + """Extract the contact info from a compact node representation. + + @type s: C{string} + @param s: the compact representation + @rtype: C{dictionary} + @return: the node ID, IP address and port to contact the node on + @raise ValueError: if the compact representation doesn't exist + """ + if (len(s) != 26): + raise ValueError + id = s[:20] + host = '.'.join([str(ord(i)) for i in s[20:24]]) + port = (ord(s[24]) << 8) | ord(s[25]) + return {'id': id, 'host': host, 'port': port} + +def compact(id, host, port): + """Create a compact representation of node contact info. + + @type id: C{string} + @param id: the node ID + @type host: C{string} + @param host: the IP address of the node + @type port: C{int} + @param port: the port number to contact the node on + @rtype: C{string} + @return: the compact representation + @raise ValueError: if the compact representation doesn't exist + """ + + s = id + ''.join([chr(int(i)) for i in host.split('.')]) + \ + chr((port & 0xFF00) >> 8) + chr(port & 0xFF) + if len(s) != 26: + raise ValueError + return s + +class TestUtil(unittest.TestCase): + """Tests for the utilities.""" + + timeout = 5 + myid = '\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!' + host = '165.234.1.34' + port = 61234 + + def test_compact(self): + d = uncompact(compact(self.myid, self.host, self.port)) + self.failUnlessEqual(d['id'], self.myid) + self.failUnlessEqual(d['host'], self.host) + self.failUnlessEqual(d['port'], self.port) + \ No newline at end of file diff --git a/debian/apt-dht.conf.sgml b/debian/apt-dht.conf.sgml deleted file mode 100644 index 301ccb1..0000000 --- a/debian/apt-dht.conf.sgml +++ /dev/null @@ -1,303 +0,0 @@ - - Cameron"> - Dale"> - - February 17, 2008"> - - 5"> - camrdale@gmail.com"> - - apt-p2p.conf"> - - - Debian"> - GNU"> - GPL"> -]> - - - -

- &dhemail; -

- - &dhfirstname; - &dhsurname; - - - 2008 - &dhusername; - - &dhdate; - - - &dhucpackage; - - &dhsection; - - - - &dhpackage; - - configuration file for &dhpackage; - - - - DESCRIPTION - - Configuration information for &dhpackage; is searched for in the following order, with later - entries overriding former ones: - - /etc/apt-p2p/apt-p2p.conf - ${HOME}/.apt-p2p/apt-p2p.conf - the location specified by the config-file parameter - - - - - - FORMAT - - &dhpackage; has a structure similar to Microsoft Windows INI files. - The configuration file consists of sections, led by a ``[section]'' header and followed - by ``name = value'' or ``name: value'' entries, with continuations in the style of RFC 822 - (values can span multiple lines by starting the subsequent lines with one or more spaces). - Some values indicate times, in which case a suffix of 'd' for - days, 'h' for hours, 'm' for minutes, and 's' for seconds can be used. - Boolean values can be '1', 'yes', 'true', and 'on' to evaluate to True, - or '0', 'no', 'false', and 'off' to evaluate to false. - Note that leading whitespace is removed from values, and case is not important. - Lines beginning with "#" or ";" are ignored and may be used to provide comments. - - - - VARIABLES - There are 2 required sections in the config file. The first is the DEFAULT section, providing - variables for the configuration of the main application. The second is the section that provides - variables for the configuration of the DHT. - - - DEFAULT - - - - - - The number of the port to listen on for requests. - The main application will use this TCP port to listen for requests from APT, and - for uploads to other peers. If a port is not specified for the DHT, it will also - use this UDP port to listen for DHT requests. - (Default is 9977.) - - - - - - The directory to store the downloaded files in. - (Default is ${HOME}/.apt-p2p/cache.) - - - - - - The list of directories containing packages to share with others. - All files in these directories will be hashed and available for everybody to download. - (Default is to share only the files downloaded.) - - - - - - Whether it's OK to use an IP addres from a known local or private range. - (Default is false) - - - - - - The time of inactivity to wait for before unloading the - packages cache. The packages cache uses a lot of memory, and only takes a few seconds - to reload when a new request arrives. (Default is 5 minutes.) - - - - - - The time after which to refresh DHT keys. - This should be a time slightly less than the DHT's KEY_EXPIRE value. - (Default is 57 minutes.) - - - - - - The DHT implementation to use. It must be possile to do (in python) - ``from <DHT>.DHT import DHT'' to get a class that implements the IDHT interface. - There should also be a similarly named section below to specify the options for the DHT. - (Default is `apt_p2p_Khashmir') - - - - - - Whether to only run the DHT. This can be useful for providing only a bootstrap node. - (Default is false) - - - - - - apt_p2p_Khashmir - - - - - - The number of the port to listen on for DHT (UDP) requests. - (Default is to use the value specified in the DEFAULT section.) - - - - - - The list of bootstrap nodes to contact to join the DHT. - Each node should be on a separate line, and start with the IP address or host name, - followed by a colon and the port number. - (Default is a list of known good nodes.) - - - - - - Whether this node is a bootstrap node. - (Default is false) - - - - - - The number of the Kademlia "K" constant. - It should be an even number. - (Default is 8.) - - - - - - The number of bits in the hash to use. - (Default is 160.) - - - - - - The time to wait between saves of the running state. - (Default is 5 minutes.) - - - - - - The concurrent number of calls per find node/value request. - (Default is 4.) - - - - - - The number of redundant copies of a value to store in the DHT. - (Default is 3.) - - - - - - The number of values to attempt to retrieve from the DHT. - Setting this to 0 will try and get all values (which could take a while if - a lot of nodes have values). Setting it negative will try to get that - number of results from only the closest STORE_REDUNDANCY nodes to the hash. - (Default is -10000, which is a large negative number so all values from the closest - STORE_REDUNDANCY nodes will be retrieved.) - - - - - - The number of times in a row a node can fail to - respond before it's booted from the routing table. - (Default is 3.) - - - - - - The minimum time to wait before re-pinging a node. - (Default is 15 minutes.) - - - - - - The maximum time to wait before refreshing a bucket. - (Default is 1 hour.) - - - - - - The time to wait before expiring unrefreshed keys. - (Default is 1 hour.) - - - - - - Whether to log lots of info about the requests and responses in the protocol. - (Default is false) - - - - - - - - SEE ALSO - - - apt-p2p8 - - - - AUTHOR - This manual page was written by &dhusername; <&dhemail;> for - the &debian; system (but may be used by others). Permission is - granted to copy, distribute and/or modify this document under - the terms of the &gnu; General Public License, Version 2 or any - later version published by the Free Software Foundation. - - - On Debian systems, the complete text of the GNU General Public - License can be found in /usr/share/common-licenses/GPL. - - - - - diff --git a/debian/apt-dht.sgml b/debian/apt-dht.sgml deleted file mode 100644 index aa106a8..0000000 --- a/debian/apt-dht.sgml +++ /dev/null @@ -1,163 +0,0 @@ - - Cameron"> - Dale"> - - February 17, 2008"> - - 8"> - camrdale@gmail.com"> - - apt-p2p"> - - - Debian"> - GNU"> - GPL"> -]> - - - -

- &dhemail; -

- - &dhfirstname; - &dhsurname; - - - 2008 - &dhusername; - - &dhdate; - - - &dhucpackage; - - &dhsection; - - - - &dhpackage; - - apt helper for peer-to-peer downloads of Debian packages - - - Normally &dhpackage; is run from init.d using twistd, in which case no &dhpackage; - options can be specified on the command-line, and all configuration variables are - read from the default config file locations of ${HOME}/.apt-p2p.conf and /etc/apt-p2p/apt-p2p.conf. - The command is then: - - twistd -y /path/to/&dhpackage; - - For a detailed listing of the twistd options, see - twistd1 - &dhpackage; can also be run without twistd by executing: - - &dhpackage; - - In this case, you can specify the options documented below on the command-line. - - - DESCRIPTION - - This manual page documents briefly the options available to the &dhpackage; command. - - &dhpackage; is a helper for downloading Debian packages files with APT. - It will download any needed files from other Apt-P2P peers in a - bittorrent-like manner, and so reduce the strain on the Debian mirrors. - - In order for APT to send it's requests to &dhpackage;, the source.list entries must be modified to point to the - local &dhpackage; address. Unless you have changed the default port, then adding "localhost:9977/" to the beginning - of each entry should be sufficient. For example, if your sources.list contains a line like this: - - deb http://ftp.us.debian.org/debian etch main contrib non-free - - then replace it with this: - - deb http://localhost:9977/ftp.us.debian.org/debian etch main contrib non-free - - The port can be changed in the &dhpackage; config file in /etc/&dhpackage;/&dhpackage;.conf, - but it defaults to 9977. - - - OPTIONS - - These programs follow the usual &gnu; command line syntax, - with short options starting with a single dash (`-'), - and long options starting with two dashes (`--'). - A summary of options is included below. - - - - - - the filename to use for the configuration file, options found in this - file are combined with those in ${HOME}/.apt-p2p/apt-p2p.conf and /etc/apt-p2p/apt-p2p.conf - (see apt-p2p.conf5 for the format of the file) - - - - - - the filename to print log messages to, - or `-' to log to standard output, if not specified then - /var/log/apt-p2p.log will be used - - - - , - - print a help message describing the invocation of the program - - - - , - - print the version of the program - - - - - - - SEE ALSO - - - apt-p2p.conf5, - twistd1 - - - - AUTHOR - This manual page was written by &dhusername; <&dhemail;> for - the &debian; system (but may be used by others). Permission is - granted to copy, distribute and/or modify this document under - the terms of the &gnu; General Public License, Version 2 or any - later version published by the Free Software Foundation. - - - On Debian systems, the complete text of the GNU General Public - License can be found in /usr/share/common-licenses/GPL. - - - - - diff --git a/debian/apt-p2p.conf.sgml b/debian/apt-p2p.conf.sgml new file mode 100644 index 0000000..301ccb1 --- /dev/null +++ b/debian/apt-p2p.conf.sgml @@ -0,0 +1,303 @@ + + Cameron"> + Dale"> + + February 17, 2008"> + + 5"> + camrdale@gmail.com"> + + apt-p2p.conf"> + + + Debian"> + GNU"> + GPL"> +]> + + + +

+ &dhemail; +

+ + &dhfirstname; + &dhsurname; + + + 2008 + &dhusername; + + &dhdate; + + + &dhucpackage; + + &dhsection; + + + + &dhpackage; + + configuration file for &dhpackage; + + + + DESCRIPTION + + Configuration information for &dhpackage; is searched for in the following order, with later + entries overriding former ones: + + /etc/apt-p2p/apt-p2p.conf + ${HOME}/.apt-p2p/apt-p2p.conf + the location specified by the config-file parameter + + + + + + FORMAT + + &dhpackage; has a structure similar to Microsoft Windows INI files. + The configuration file consists of sections, led by a ``[section]'' header and followed + by ``name = value'' or ``name: value'' entries, with continuations in the style of RFC 822 + (values can span multiple lines by starting the subsequent lines with one or more spaces). + Some values indicate times, in which case a suffix of 'd' for + days, 'h' for hours, 'm' for minutes, and 's' for seconds can be used. + Boolean values can be '1', 'yes', 'true', and 'on' to evaluate to True, + or '0', 'no', 'false', and 'off' to evaluate to false. + Note that leading whitespace is removed from values, and case is not important. + Lines beginning with "#" or ";" are ignored and may be used to provide comments. + + + + VARIABLES + There are 2 required sections in the config file. The first is the DEFAULT section, providing + variables for the configuration of the main application. The second is the section that provides + variables for the configuration of the DHT. + + + DEFAULT + + + + + + The number of the port to listen on for requests. + The main application will use this TCP port to listen for requests from APT, and + for uploads to other peers. If a port is not specified for the DHT, it will also + use this UDP port to listen for DHT requests. + (Default is 9977.) + + + + + + The directory to store the downloaded files in. + (Default is ${HOME}/.apt-p2p/cache.) + + + + + + The list of directories containing packages to share with others. + All files in these directories will be hashed and available for everybody to download. + (Default is to share only the files downloaded.) + + + + + + Whether it's OK to use an IP addres from a known local or private range. + (Default is false) + + + + + + The time of inactivity to wait for before unloading the + packages cache. The packages cache uses a lot of memory, and only takes a few seconds + to reload when a new request arrives. (Default is 5 minutes.) + + + + + + The time after which to refresh DHT keys. + This should be a time slightly less than the DHT's KEY_EXPIRE value. + (Default is 57 minutes.) + + + + + + The DHT implementation to use. It must be possile to do (in python) + ``from <DHT>.DHT import DHT'' to get a class that implements the IDHT interface. + There should also be a similarly named section below to specify the options for the DHT. + (Default is `apt_p2p_Khashmir') + + + + + + Whether to only run the DHT. This can be useful for providing only a bootstrap node. + (Default is false) + + + + + + apt_p2p_Khashmir + + + + + + The number of the port to listen on for DHT (UDP) requests. + (Default is to use the value specified in the DEFAULT section.) + + + + + + The list of bootstrap nodes to contact to join the DHT. + Each node should be on a separate line, and start with the IP address or host name, + followed by a colon and the port number. + (Default is a list of known good nodes.) + + + + + + Whether this node is a bootstrap node. + (Default is false) + + + + + + The number of the Kademlia "K" constant. + It should be an even number. + (Default is 8.) + + + + + + The number of bits in the hash to use. + (Default is 160.) + + + + + + The time to wait between saves of the running state. + (Default is 5 minutes.) + + + + + + The concurrent number of calls per find node/value request. + (Default is 4.) + + + + + + The number of redundant copies of a value to store in the DHT. + (Default is 3.) + + + + + + The number of values to attempt to retrieve from the DHT. + Setting this to 0 will try and get all values (which could take a while if + a lot of nodes have values). Setting it negative will try to get that + number of results from only the closest STORE_REDUNDANCY nodes to the hash. + (Default is -10000, which is a large negative number so all values from the closest + STORE_REDUNDANCY nodes will be retrieved.) + + + + + + The number of times in a row a node can fail to + respond before it's booted from the routing table. + (Default is 3.) + + + + + + The minimum time to wait before re-pinging a node. + (Default is 15 minutes.) + + + + + + The maximum time to wait before refreshing a bucket. + (Default is 1 hour.) + + + + + + The time to wait before expiring unrefreshed keys. + (Default is 1 hour.) + + + + + + Whether to log lots of info about the requests and responses in the protocol. + (Default is false) + + + + + + + + SEE ALSO + + + apt-p2p8 + + + + AUTHOR + This manual page was written by &dhusername; <&dhemail;> for + the &debian; system (but may be used by others). Permission is + granted to copy, distribute and/or modify this document under + the terms of the &gnu; General Public License, Version 2 or any + later version published by the Free Software Foundation. + + + On Debian systems, the complete text of the GNU General Public + License can be found in /usr/share/common-licenses/GPL. + + + + + diff --git a/debian/apt-p2p.sgml b/debian/apt-p2p.sgml new file mode 100644 index 0000000..aa106a8 --- /dev/null +++ b/debian/apt-p2p.sgml @@ -0,0 +1,163 @@ + + Cameron"> + Dale"> + + February 17, 2008"> + + 8"> + camrdale@gmail.com"> + + apt-p2p"> + + + Debian"> + GNU"> + GPL"> +]> + + + +

+ &dhemail; +

+ + &dhfirstname; + &dhsurname; + + + 2008 + &dhusername; + + &dhdate; + + + &dhucpackage; + + &dhsection; + + + + &dhpackage; + + apt helper for peer-to-peer downloads of Debian packages + + + Normally &dhpackage; is run from init.d using twistd, in which case no &dhpackage; + options can be specified on the command-line, and all configuration variables are + read from the default config file locations of ${HOME}/.apt-p2p.conf and /etc/apt-p2p/apt-p2p.conf. + The command is then: + + twistd -y /path/to/&dhpackage; + + For a detailed listing of the twistd options, see + twistd1 + &dhpackage; can also be run without twistd by executing: + + &dhpackage; + + In this case, you can specify the options documented below on the command-line. + + + DESCRIPTION + + This manual page documents briefly the options available to the &dhpackage; command. + + &dhpackage; is a helper for downloading Debian packages files with APT. + It will download any needed files from other Apt-P2P peers in a + bittorrent-like manner, and so reduce the strain on the Debian mirrors. + + In order for APT to send it's requests to &dhpackage;, the source.list entries must be modified to point to the + local &dhpackage; address. Unless you have changed the default port, then adding "localhost:9977/" to the beginning + of each entry should be sufficient. For example, if your sources.list contains a line like this: + + deb http://ftp.us.debian.org/debian etch main contrib non-free + + then replace it with this: + + deb http://localhost:9977/ftp.us.debian.org/debian etch main contrib non-free + + The port can be changed in the &dhpackage; config file in /etc/&dhpackage;/&dhpackage;.conf, + but it defaults to 9977. + + + OPTIONS + + These programs follow the usual &gnu; command line syntax, + with short options starting with a single dash (`-'), + and long options starting with two dashes (`--'). + A summary of options is included below. + + + + + + the filename to use for the configuration file, options found in this + file are combined with those in ${HOME}/.apt-p2p/apt-p2p.conf and /etc/apt-p2p/apt-p2p.conf + (see apt-p2p.conf5 for the format of the file) + + + + + + the filename to print log messages to, + or `-' to log to standard output, if not specified then + /var/log/apt-p2p.log will be used + + + + , + + print a help message describing the invocation of the program + + + + , + + print the version of the program + + + + + + + SEE ALSO + + + apt-p2p.conf5, + twistd1 + + + + AUTHOR + This manual page was written by &dhusername; <&dhemail;> for + the &debian; system (but may be used by others). Permission is + granted to copy, distribute and/or modify this document under + the terms of the &gnu; General Public License, Version 2 or any + later version published by the Free Software Foundation. + + + On Debian systems, the complete text of the GNU General Public + License can be found in /usr/share/common-licenses/GPL. + + + + + diff --git a/docs/motivation/apt-dht-motivation.kilepr b/docs/motivation/apt-dht-motivation.kilepr deleted file mode 100644 index e3bba67..0000000 --- a/docs/motivation/apt-dht-motivation.kilepr +++ /dev/null @@ -1,40 +0,0 @@ -[General] -img_extIsRegExp=false -img_extensions=.eps .pdf .dvi .ps .fig .gif .jpg .jpeg .png -kileprversion=1 -kileversion=1.9.2 -lastDocument=motivation.tex -masterDocument= -name=apt-p2p-motivation -pkg_extIsRegExp=false -pkg_extensions=.cls .sty .dtx -src_extIsRegExp=false -src_extensions=.tex .ltx .bib .mp - -[Tools] -MakeIndex= -QuickBuild=LaTeX+DVItoPDF+ViewPDF - -[item:all.bib] -archive=true -column=20 -encoding=UTF-8 -highlight=BibTeX -line=225 -open=true - -[item:apt-p2p-motivation.kilepr] -archive=true -column=0 -encoding= -highlight= -line=0 -open=false - -[item:motivation.tex] -archive=true -column=0 -encoding=UTF-8 -highlight=LaTeX -line=347 -open=true diff --git a/docs/motivation/apt-p2p-motivation.kilepr b/docs/motivation/apt-p2p-motivation.kilepr new file mode 100644 index 0000000..e3bba67 --- /dev/null +++ b/docs/motivation/apt-p2p-motivation.kilepr @@ -0,0 +1,40 @@ +[General] +img_extIsRegExp=false +img_extensions=.eps .pdf .dvi .ps .fig .gif .jpg .jpeg .png +kileprversion=1 +kileversion=1.9.2 +lastDocument=motivation.tex +masterDocument= +name=apt-p2p-motivation +pkg_extIsRegExp=false +pkg_extensions=.cls .sty .dtx +src_extIsRegExp=false +src_extensions=.tex .ltx .bib .mp + +[Tools] +MakeIndex= +QuickBuild=LaTeX+DVItoPDF+ViewPDF + +[item:all.bib] +archive=true +column=20 +encoding=UTF-8 +highlight=BibTeX +line=225 +open=true + +[item:apt-p2p-motivation.kilepr] +archive=true +column=0 +encoding= +highlight= +line=0 +open=false + +[item:motivation.tex] +archive=true +column=0 +encoding=UTF-8 +highlight=LaTeX +line=347 +open=true diff --git a/docs/motivation/apt_dht_simulation-size_CDF.eps b/docs/motivation/apt_dht_simulation-size_CDF.eps deleted file mode 100644 index 0ccb9ff..0000000 --- a/docs/motivation/apt_dht_simulation-size_CDF.eps +++ /dev/null @@ -1,698 +0,0 @@ -%!PS-Adobe-2.0 EPSF-1.2 -%%Creator: MATLAB, The Mathworks, Inc. Version 7.5.0.338 (R2007b). Operating System: Linux 2.6.18.8-0.7-default #1 SMP Tue Oct 2 17:21:08 UTC 2007 i686. -%%Title: /cs/grad1/camerond/school/matlab/cache/apt_dht_simulation-size_CDF.20080208T171700.eps -%%CreationDate: 02/08/2008 17:17:44 -%%DocumentNeededFonts: Helvetica -%%DocumentProcessColors: Cyan Magenta Yellow Black -%%Extensions: CMYK -%%Pages: 1 -%%BoundingBox: 58 196 550 591 -%%EndComments - -%%BeginProlog -% MathWorks dictionary -/MathWorks 160 dict begin -% definition operators -/bdef {bind def} bind def -/ldef {load def} bind def -/xdef {exch def} bdef -/xstore {exch store} bdef -% operator abbreviations -/c /clip ldef -/cc /concat ldef -/cp /closepath ldef -/gr /grestore ldef -/gs /gsave ldef -/mt /moveto ldef -/np /newpath ldef -/cm /currentmatrix ldef -/sm /setmatrix ldef -/rm /rmoveto ldef -/rl /rlineto ldef -/s {show newpath} bdef -/sc {setcmykcolor} bdef -/sr /setrgbcolor ldef -/sg /setgray ldef -/w /setlinewidth ldef -/j /setlinejoin ldef -/cap /setlinecap ldef -/rc {rectclip} bdef -/rf {rectfill} bdef -% page state control -/pgsv () def -/bpage {/pgsv save def} bdef -/epage {pgsv restore} bdef -/bplot /gsave ldef -/eplot {stroke grestore} bdef -% orientation switch -/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def -% coordinate system mappings -/dpi2point 0 def -% font control -/FontSize 0 def -/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] - makefont setfont} bdef -/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse - exch dup 3 1 roll findfont dup length dict begin - { 1 index /FID ne {def}{pop pop} ifelse } forall - /Encoding exch def currentdict end definefont pop} bdef -/isroman {findfont /CharStrings get /Agrave known} bdef -/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse - exch FMS} bdef -/csm {1 dpi2point div -1 dpi2point div scale neg translate - dup landscapeMode eq {pop -90 rotate} - {rotateMode eq {90 rotate} if} ifelse} bdef -% line types: solid, dotted, dashed, dotdash -/SO { [] 0 setdash } bdef -/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef -/DA { [6 dpi2point mul] 0 setdash } bdef -/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 - dpi2point mul] 0 setdash } bdef -% macros for lines and objects -/L {lineto stroke} bdef -/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef -/AP {{rlineto} repeat} bdef -/PDlw -1 def -/W {/PDlw currentlinewidth def setlinewidth} def -/PP {closepath eofill} bdef -/DP {closepath stroke} bdef -/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto - neg 0 exch rlineto closepath} bdef -/FR {MR stroke} bdef -/PR {MR fill} bdef -/L1i {{currentfile picstr readhexstring pop} image} bdef -/tMatrix matrix def -/MakeOval {newpath tMatrix currentmatrix pop translate scale -0 0 1 0 360 arc tMatrix setmatrix} bdef -/FO {MakeOval stroke} bdef -/PO {MakeOval fill} bdef -/PD {currentlinewidth 2 div 0 360 arc fill - PDlw -1 eq not {PDlw w /PDlw -1 def} if} def -/FA {newpath tMatrix currentmatrix pop translate scale - 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef -/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale - 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef -/FAn {newpath tMatrix currentmatrix pop translate scale - 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef -/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale - 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef -/vradius 0 def /hradius 0 def /lry 0 def -/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def -/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef - /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly - vradius add translate hradius vradius scale 0 0 1 180 270 arc - tMatrix setmatrix lrx hradius sub uly vradius add translate - hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix - lrx hradius sub lry vradius sub translate hradius vradius scale - 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub - translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix - closepath} bdef -/FRR {MRR stroke } bdef -/PRR {MRR fill } bdef -/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def - newpath tMatrix currentmatrix pop ulx rad add uly rad add translate - rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad - sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix - closepath} bdef -/FlrRR {MlrRR stroke } bdef -/PlrRR {MlrRR fill } bdef -/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def - newpath tMatrix currentmatrix pop ulx rad add uly rad add translate - rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad - sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix - closepath} bdef -/FtbRR {MtbRR stroke } bdef -/PtbRR {MtbRR fill } bdef -/stri 6 array def /dtri 6 array def -/smat 6 array def /dmat 6 array def -/tmat1 6 array def /tmat2 6 array def /dif 3 array def -/asub {/ind2 exch def /ind1 exch def dup dup - ind1 get exch ind2 get sub exch } bdef -/tri_to_matrix { - 2 0 asub 3 1 asub 4 0 asub 5 1 asub - dup 0 get exch 1 get 7 -1 roll astore } bdef -/compute_transform { - dmat dtri tri_to_matrix tmat1 invertmatrix - smat stri tri_to_matrix tmat2 concatmatrix } bdef -/ds {stri astore pop} bdef -/dt {dtri astore pop} bdef -/db {2 copy /cols xdef /rows xdef mul dup 3 mul string - currentfile exch readhexstring pop - dup 0 3 index getinterval /rbmap xdef - dup 2 index dup getinterval /gbmap xdef - 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef -/it {gs np dtri aload pop moveto lineto lineto cp c - cols rows 8 compute_transform - rbmap gbmap bbmap true 3 colorimage gr}bdef -/il {newpath moveto lineto stroke}bdef -currentdict end def -%%EndProlog - -%%BeginSetup -MathWorks begin - -0 cap - -end -%%EndSetup - -%%Page: 1 1 -%%BeginPageSetup -%%PageBoundingBox: 58 196 550 591 -MathWorks begin -bpage -%%EndPageSetup - -%%BeginObject: obj1 -bplot - -/dpi2point 12 def -portraitMode 0216 7344 csm - - 480 247 5913 4745 MR c np -76 dict begin %Colortable dictionary -/c0 { 0.000000 0.000000 0.000000 sr} bdef -/c1 { 1.000000 1.000000 1.000000 sr} bdef -/c2 { 0.900000 0.000000 0.000000 sr} bdef -/c3 { 0.000000 0.820000 0.000000 sr} bdef -/c4 { 0.000000 0.000000 0.800000 sr} bdef -/c5 { 0.910000 0.820000 0.320000 sr} bdef -/c6 { 1.000000 0.260000 0.820000 sr} bdef -/c7 { 0.000000 0.820000 0.820000 sr} bdef -c0 -1 j -1 sg - 0 0 6913 5186 PR -6 w -0 4226 5356 0 0 -4226 899 4615 4 MP -PP --5356 0 0 4226 5356 0 0 -4226 899 4615 5 MP stroke -4 w -DO -0 sg - 899 4615 mt 899 389 L - 899 389 mt 899 389 L -1970 4615 mt 1970 389 L -1970 389 mt 1970 389 L -3041 4615 mt 3041 389 L -3041 389 mt 3041 389 L -4112 4615 mt 4112 389 L -4112 389 mt 4112 389 L -5183 4615 mt 5183 389 L -5183 389 mt 5183 389 L -6255 4615 mt 6255 389 L -6255 389 mt 6255 389 L - 899 4615 mt 6255 4615 L -6255 4615 mt 6255 4615 L - 899 4192 mt 6255 4192 L -6255 4192 mt 6255 4192 L - 899 3769 mt 6255 3769 L -6255 3769 mt 6255 3769 L - 899 3347 mt 6255 3347 L -6255 3347 mt 6255 3347 L - 899 2924 mt 6255 2924 L -6255 2924 mt 6255 2924 L - 899 2502 mt 6255 2502 L -6255 2502 mt 6255 2502 L - 899 2079 mt 6255 2079 L -6255 2079 mt 6255 2079 L - 899 1656 mt 6255 1656 L -6255 1656 mt 6255 1656 L - 899 1234 mt 6255 1234 L -6255 1234 mt 6255 1234 L - 899 811 mt 6255 811 L -6255 811 mt 6255 811 L - 899 389 mt 6255 389 L -6255 389 mt 6255 389 L -SO -6 w - 899 4615 mt 6255 4615 L - 899 389 mt 6255 389 L - 899 4615 mt 899 389 L -6255 4615 mt 6255 389 L - 899 4615 mt 6255 4615 L - 899 4615 mt 899 389 L - 899 4615 mt 899 4588 L - 899 389 mt 899 415 L -DO - 899 4615 mt 899 389 L - 899 389 mt 899 389 L -SO - 899 4615 mt 899 4561 L - 899 389 mt 899 442 L -%%IncludeResource: font Helvetica -/Helvetica /ISOLatin1Encoding 120 FMSR - - 811 4797 mt -(10) s -%%IncludeResource: font Helvetica -/Helvetica /ISOLatin1Encoding 80 FMSR - - 944 4723 mt -(0) s -1221 4615 mt 1221 4588 L -1221 389 mt 1221 415 L -DO -1221 4615 mt 1221 389 L -1221 389 mt 1221 389 L -SO -1410 4615 mt 1410 4588 L -1410 389 mt 1410 415 L -DO -1410 4615 mt 1410 389 L -1410 389 mt 1410 389 L -SO -1543 4615 mt 1543 4588 L -1543 389 mt 1543 415 L -DO -1543 4615 mt 1543 389 L -1543 389 mt 1543 389 L -SO -1647 4615 mt 1647 4588 L -1647 389 mt 1647 415 L -DO -1647 4615 mt 1647 389 L -1647 389 mt 1647 389 L -SO -1732 4615 mt 1732 4588 L -1732 389 mt 1732 415 L -DO -1732 4615 mt 1732 389 L -1732 389 mt 1732 389 L -SO -1804 4615 mt 1804 4588 L -1804 389 mt 1804 415 L -DO -1804 4615 mt 1804 389 L -1804 389 mt 1804 389 L -SO -1866 4615 mt 1866 4588 L -1866 389 mt 1866 415 L -DO -1866 4615 mt 1866 389 L -1866 389 mt 1866 389 L -SO -1921 4615 mt 1921 4588 L -1921 389 mt 1921 415 L -DO -1921 4615 mt 1921 389 L -1921 389 mt 1921 389 L -SO -1970 4615 mt 1970 4588 L -1970 389 mt 1970 415 L -DO -1970 4615 mt 1970 389 L -1970 389 mt 1970 389 L -SO -1970 4615 mt 1970 4561 L -1970 389 mt 1970 442 L -%%IncludeResource: font Helvetica -/Helvetica /ISOLatin1Encoding 120 FMSR - -1882 4797 mt -(10) s -%%IncludeResource: font Helvetica -/Helvetica /ISOLatin1Encoding 80 FMSR - -2015 4723 mt -(1) s -2292 4615 mt 2292 4588 L -2292 389 mt 2292 415 L -DO -2292 4615 mt 2292 389 L -2292 389 mt 2292 389 L -SO -2481 4615 mt 2481 4588 L -2481 389 mt 2481 415 L -DO -2481 4615 mt 2481 389 L -2481 389 mt 2481 389 L -SO -2615 4615 mt 2615 4588 L -2615 389 mt 2615 415 L -DO -2615 4615 mt 2615 389 L -2615 389 mt 2615 389 L -SO -2718 4615 mt 2718 4588 L -2718 389 mt 2718 415 L -DO -2718 4615 mt 2718 389 L -2718 389 mt 2718 389 L -SO -2803 4615 mt 2803 4588 L -2803 389 mt 2803 415 L -DO -2803 4615 mt 2803 389 L -2803 389 mt 2803 389 L -SO -2875 4615 mt 2875 4588 L -2875 389 mt 2875 415 L -DO -2875 4615 mt 2875 389 L -2875 389 mt 2875 389 L -SO -2937 4615 mt 2937 4588 L -2937 389 mt 2937 415 L -DO -2937 4615 mt 2937 389 L -2937 389 mt 2937 389 L -SO -2992 4615 mt 2992 4588 L -2992 389 mt 2992 415 L -DO -2992 4615 mt 2992 389 L -2992 389 mt 2992 389 L -SO -3041 4615 mt 3041 4588 L -3041 389 mt 3041 415 L -DO -3041 4615 mt 3041 389 L -3041 389 mt 3041 389 L -SO -3041 4615 mt 3041 4561 L -3041 389 mt 3041 442 L -%%IncludeResource: font Helvetica -/Helvetica /ISOLatin1Encoding 120 FMSR - -2953 4797 mt -(10) s -%%IncludeResource: font Helvetica -/Helvetica /ISOLatin1Encoding 80 FMSR - -3086 4723 mt -(2) s -3363 4615 mt 3363 4588 L -3363 389 mt 3363 415 L -DO -3363 4615 mt 3363 389 L -3363 389 mt 3363 389 L -SO -3552 4615 mt 3552 4588 L -3552 389 mt 3552 415 L -DO -3552 4615 mt 3552 389 L -3552 389 mt 3552 389 L -SO -3686 4615 mt 3686 4588 L -3686 389 mt 3686 415 L -DO -3686 4615 mt 3686 389 L -3686 389 mt 3686 389 L -SO -3790 4615 mt 3790 4588 L -3790 389 mt 3790 415 L -DO -3790 4615 mt 3790 389 L -3790 389 mt 3790 389 L -SO -3874 4615 mt 3874 4588 L -3874 389 mt 3874 415 L -DO -3874 4615 mt 3874 389 L -3874 389 mt 3874 389 L -SO -3946 4615 mt 3946 4588 L -3946 389 mt 3946 415 L -DO -3946 4615 mt 3946 389 L -3946 389 mt 3946 389 L -SO -4008 4615 mt 4008 4588 L -4008 389 mt 4008 415 L -DO -4008 4615 mt 4008 389 L -4008 389 mt 4008 389 L -SO -4063 4615 mt 4063 4588 L -4063 389 mt 4063 415 L -DO -4063 4615 mt 4063 389 L -4063 389 mt 4063 389 L -SO -4112 4615 mt 4112 4588 L -4112 389 mt 4112 415 L -DO -4112 4615 mt 4112 389 L -4112 389 mt 4112 389 L -SO -4112 4615 mt 4112 4561 L -4112 389 mt 4112 442 L -%%IncludeResource: font Helvetica -/Helvetica /ISOLatin1Encoding 120 FMSR - -4024 4797 mt -(10) s -%%IncludeResource: font Helvetica -/Helvetica /ISOLatin1Encoding 80 FMSR - -4157 4723 mt -(3) s -4435 4615 mt 4435 4588 L -4435 389 mt 4435 415 L -DO -4435 4615 mt 4435 389 L -4435 389 mt 4435 389 L -SO -4623 4615 mt 4623 4588 L -4623 389 mt 4623 415 L -DO -4623 4615 mt 4623 389 L -4623 389 mt 4623 389 L -SO -4757 4615 mt 4757 4588 L -4757 389 mt 4757 415 L -DO -4757 4615 mt 4757 389 L -4757 389 mt 4757 389 L -SO -4861 4615 mt 4861 4588 L -4861 389 mt 4861 415 L -DO -4861 4615 mt 4861 389 L -4861 389 mt 4861 389 L -SO -4946 4615 mt 4946 4588 L -4946 389 mt 4946 415 L -DO -4946 4615 mt 4946 389 L -4946 389 mt 4946 389 L -SO -5017 4615 mt 5017 4588 L -5017 389 mt 5017 415 L -DO -5017 4615 mt 5017 389 L -5017 389 mt 5017 389 L -SO -5079 4615 mt 5079 4588 L -5079 389 mt 5079 415 L -DO -5079 4615 mt 5079 389 L -5079 389 mt 5079 389 L -SO -5134 4615 mt 5134 4588 L -5134 389 mt 5134 415 L -DO -5134 4615 mt 5134 389 L -5134 389 mt 5134 389 L -SO -5183 4615 mt 5183 4588 L -5183 389 mt 5183 415 L -DO -5183 4615 mt 5183 389 L -5183 389 mt 5183 389 L -SO -5183 4615 mt 5183 4561 L -5183 389 mt 5183 442 L -%%IncludeResource: font Helvetica -/Helvetica /ISOLatin1Encoding 120 FMSR - -5095 4797 mt -(10) s -%%IncludeResource: font Helvetica -/Helvetica /ISOLatin1Encoding 80 FMSR - -5228 4723 mt -(4) s -5506 4615 mt 5506 4588 L -5506 389 mt 5506 415 L -DO -5506 4615 mt 5506 389 L -5506 389 mt 5506 389 L -SO -5694 4615 mt 5694 4588 L -5694 389 mt 5694 415 L -DO -5694 4615 mt 5694 389 L -5694 389 mt 5694 389 L -SO -5828 4615 mt 5828 4588 L -5828 389 mt 5828 415 L -DO -5828 4615 mt 5828 389 L -5828 389 mt 5828 389 L -SO -5932 4615 mt 5932 4588 L -5932 389 mt 5932 415 L -DO -5932 4615 mt 5932 389 L -5932 389 mt 5932 389 L -SO -6017 4615 mt 6017 4588 L -6017 389 mt 6017 415 L -DO -6017 4615 mt 6017 389 L -6017 389 mt 6017 389 L -SO -6089 4615 mt 6089 4588 L -6089 389 mt 6089 415 L -DO -6089 4615 mt 6089 389 L -6089 389 mt 6089 389 L -SO -6151 4615 mt 6151 4588 L -6151 389 mt 6151 415 L -DO -6151 4615 mt 6151 389 L -6151 389 mt 6151 389 L -SO -6205 4615 mt 6205 4588 L -6205 389 mt 6205 415 L -DO -6205 4615 mt 6205 389 L -6205 389 mt 6205 389 L -SO -6255 4615 mt 6255 4588 L -6255 389 mt 6255 415 L -DO -6255 4615 mt 6255 389 L -6255 389 mt 6255 389 L -SO -6255 4615 mt 6255 4561 L -6255 389 mt 6255 442 L -%%IncludeResource: font Helvetica -/Helvetica /ISOLatin1Encoding 120 FMSR - -6167 4797 mt -(10) s -%%IncludeResource: font Helvetica -/Helvetica /ISOLatin1Encoding 80 FMSR - -6300 4723 mt -(5) s - 899 4615 mt 952 4615 L -6255 4615 mt 6201 4615 L -%%IncludeResource: font Helvetica -/Helvetica /ISOLatin1Encoding 120 FMSR - - 798 4659 mt -(0) s - 899 4192 mt 952 4192 L -6255 4192 mt 6201 4192 L - 698 4236 mt -(0.1) s - 899 3769 mt 952 3769 L -6255 3769 mt 6201 3769 L - 698 3813 mt -(0.2) s - 899 3347 mt 952 3347 L -6255 3347 mt 6201 3347 L - 698 3391 mt -(0.3) s - 899 2924 mt 952 2924 L -6255 2924 mt 6201 2924 L - 698 2968 mt -(0.4) s - 899 2502 mt 952 2502 L -6255 2502 mt 6201 2502 L - 698 2546 mt -(0.5) s - 899 2079 mt 952 2079 L -6255 2079 mt 6201 2079 L - 698 2123 mt -(0.6) s - 899 1656 mt 952 1656 L -6255 1656 mt 6201 1656 L - 698 1700 mt -(0.7) s - 899 1234 mt 952 1234 L -6255 1234 mt 6201 1234 L - 698 1278 mt -(0.8) s - 899 811 mt 952 811 L -6255 811 mt 6201 811 L - 698 855 mt -(0.9) s - 899 389 mt 952 389 L -6255 389 mt 6201 389 L - 798 433 mt -(1) s - 899 4615 mt 6255 4615 L - 899 389 mt 6255 389 L - 899 4615 mt 899 389 L -6255 4615 mt 6255 389 L -gs 899 389 5357 4227 MR c np -322 -20 322 -50 323 -89 322 -157 323 -232 322 -349 323 -431 322 -523 -323 -587 322 -596 323 -580 322 -399 323 -150 322 -44 1221 4605 15 MP stroke -DA -322 -8 322 -49 323 -92 322 -200 323 -224 322 -379 323 -536 322 -679 -323 -671 322 -591 323 -442 322 -262 323 -58 322 -8 1221 4595 15 MP stroke -gr - -DA -3087 4940 mt -(Package Size $kB$) s - 616 3117 mt -90 rotate -(Cumulative Distribution) s -90 rotate - 882 4658 mt -( ) s -6239 431 mt -( ) s -SO -1 sg -0 334 1208 0 0 -334 4988 784 4 MP -PP --1208 0 0 334 1208 0 0 -334 4988 784 5 MP stroke -4 w -DO -SO -6 w -0 sg -4988 784 mt 6196 784 L -4988 450 mt 6196 450 L -4988 784 mt 4988 450 L -6196 784 mt 6196 450 L -4988 784 mt 6196 784 L -4988 784 mt 4988 450 L -4988 784 mt 6196 784 L -4988 450 mt 6196 450 L -4988 784 mt 4988 450 L -6196 784 mt 6196 450 L -5450 585 mt -(By Number) s -gs 4988 450 1209 335 MR c np -355 0 5059 543 2 MP stroke -gr - -5450 734 mt -(By Popularity) s -gs 4988 450 1209 335 MR c np -DA -355 0 5059 690 2 MP stroke -SO -gr - - -end %%Color Dict - -eplot -%%EndObject - -epage -end - -showpage - -%%Trailer -%%EOF diff --git a/docs/motivation/apt_p2p_simulation-size_CDF.eps b/docs/motivation/apt_p2p_simulation-size_CDF.eps new file mode 100644 index 0000000..0ccb9ff --- /dev/null +++ b/docs/motivation/apt_p2p_simulation-size_CDF.eps @@ -0,0 +1,698 @@ +%!PS-Adobe-2.0 EPSF-1.2 +%%Creator: MATLAB, The Mathworks, Inc. Version 7.5.0.338 (R2007b). Operating System: Linux 2.6.18.8-0.7-default #1 SMP Tue Oct 2 17:21:08 UTC 2007 i686. +%%Title: /cs/grad1/camerond/school/matlab/cache/apt_dht_simulation-size_CDF.20080208T171700.eps +%%CreationDate: 02/08/2008 17:17:44 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%Extensions: CMYK +%%Pages: 1 +%%BoundingBox: 58 196 550 591 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile exch readhexstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: 58 196 550 591 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode 0216 7344 csm + + 480 247 5913 4745 MR c np +76 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 6913 5186 PR +6 w +0 4226 5356 0 0 -4226 899 4615 4 MP +PP +-5356 0 0 4226 5356 0 0 -4226 899 4615 5 MP stroke +4 w +DO +0 sg + 899 4615 mt 899 389 L + 899 389 mt 899 389 L +1970 4615 mt 1970 389 L +1970 389 mt 1970 389 L +3041 4615 mt 3041 389 L +3041 389 mt 3041 389 L +4112 4615 mt 4112 389 L +4112 389 mt 4112 389 L +5183 4615 mt 5183 389 L +5183 389 mt 5183 389 L +6255 4615 mt 6255 389 L +6255 389 mt 6255 389 L + 899 4615 mt 6255 4615 L +6255 4615 mt 6255 4615 L + 899 4192 mt 6255 4192 L +6255 4192 mt 6255 4192 L + 899 3769 mt 6255 3769 L +6255 3769 mt 6255 3769 L + 899 3347 mt 6255 3347 L +6255 3347 mt 6255 3347 L + 899 2924 mt 6255 2924 L +6255 2924 mt 6255 2924 L + 899 2502 mt 6255 2502 L +6255 2502 mt 6255 2502 L + 899 2079 mt 6255 2079 L +6255 2079 mt 6255 2079 L + 899 1656 mt 6255 1656 L +6255 1656 mt 6255 1656 L + 899 1234 mt 6255 1234 L +6255 1234 mt 6255 1234 L + 899 811 mt 6255 811 L +6255 811 mt 6255 811 L + 899 389 mt 6255 389 L +6255 389 mt 6255 389 L +SO +6 w + 899 4615 mt 6255 4615 L + 899 389 mt 6255 389 L + 899 4615 mt 899 389 L +6255 4615 mt 6255 389 L + 899 4615 mt 6255 4615 L + 899 4615 mt 899 389 L + 899 4615 mt 899 4588 L + 899 389 mt 899 415 L +DO + 899 4615 mt 899 389 L + 899 389 mt 899 389 L +SO + 899 4615 mt 899 4561 L + 899 389 mt 899 442 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + + 811 4797 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + + 944 4723 mt +(0) s +1221 4615 mt 1221 4588 L +1221 389 mt 1221 415 L +DO +1221 4615 mt 1221 389 L +1221 389 mt 1221 389 L +SO +1410 4615 mt 1410 4588 L +1410 389 mt 1410 415 L +DO +1410 4615 mt 1410 389 L +1410 389 mt 1410 389 L +SO +1543 4615 mt 1543 4588 L +1543 389 mt 1543 415 L +DO +1543 4615 mt 1543 389 L +1543 389 mt 1543 389 L +SO +1647 4615 mt 1647 4588 L +1647 389 mt 1647 415 L +DO +1647 4615 mt 1647 389 L +1647 389 mt 1647 389 L +SO +1732 4615 mt 1732 4588 L +1732 389 mt 1732 415 L +DO +1732 4615 mt 1732 389 L +1732 389 mt 1732 389 L +SO +1804 4615 mt 1804 4588 L +1804 389 mt 1804 415 L +DO +1804 4615 mt 1804 389 L +1804 389 mt 1804 389 L +SO +1866 4615 mt 1866 4588 L +1866 389 mt 1866 415 L +DO +1866 4615 mt 1866 389 L +1866 389 mt 1866 389 L +SO +1921 4615 mt 1921 4588 L +1921 389 mt 1921 415 L +DO +1921 4615 mt 1921 389 L +1921 389 mt 1921 389 L +SO +1970 4615 mt 1970 4588 L +1970 389 mt 1970 415 L +DO +1970 4615 mt 1970 389 L +1970 389 mt 1970 389 L +SO +1970 4615 mt 1970 4561 L +1970 389 mt 1970 442 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +1882 4797 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +2015 4723 mt +(1) s +2292 4615 mt 2292 4588 L +2292 389 mt 2292 415 L +DO +2292 4615 mt 2292 389 L +2292 389 mt 2292 389 L +SO +2481 4615 mt 2481 4588 L +2481 389 mt 2481 415 L +DO +2481 4615 mt 2481 389 L +2481 389 mt 2481 389 L +SO +2615 4615 mt 2615 4588 L +2615 389 mt 2615 415 L +DO +2615 4615 mt 2615 389 L +2615 389 mt 2615 389 L +SO +2718 4615 mt 2718 4588 L +2718 389 mt 2718 415 L +DO +2718 4615 mt 2718 389 L +2718 389 mt 2718 389 L +SO +2803 4615 mt 2803 4588 L +2803 389 mt 2803 415 L +DO +2803 4615 mt 2803 389 L +2803 389 mt 2803 389 L +SO +2875 4615 mt 2875 4588 L +2875 389 mt 2875 415 L +DO +2875 4615 mt 2875 389 L +2875 389 mt 2875 389 L +SO +2937 4615 mt 2937 4588 L +2937 389 mt 2937 415 L +DO +2937 4615 mt 2937 389 L +2937 389 mt 2937 389 L +SO +2992 4615 mt 2992 4588 L +2992 389 mt 2992 415 L +DO +2992 4615 mt 2992 389 L +2992 389 mt 2992 389 L +SO +3041 4615 mt 3041 4588 L +3041 389 mt 3041 415 L +DO +3041 4615 mt 3041 389 L +3041 389 mt 3041 389 L +SO +3041 4615 mt 3041 4561 L +3041 389 mt 3041 442 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +2953 4797 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +3086 4723 mt +(2) s +3363 4615 mt 3363 4588 L +3363 389 mt 3363 415 L +DO +3363 4615 mt 3363 389 L +3363 389 mt 3363 389 L +SO +3552 4615 mt 3552 4588 L +3552 389 mt 3552 415 L +DO +3552 4615 mt 3552 389 L +3552 389 mt 3552 389 L +SO +3686 4615 mt 3686 4588 L +3686 389 mt 3686 415 L +DO +3686 4615 mt 3686 389 L +3686 389 mt 3686 389 L +SO +3790 4615 mt 3790 4588 L +3790 389 mt 3790 415 L +DO +3790 4615 mt 3790 389 L +3790 389 mt 3790 389 L +SO +3874 4615 mt 3874 4588 L +3874 389 mt 3874 415 L +DO +3874 4615 mt 3874 389 L +3874 389 mt 3874 389 L +SO +3946 4615 mt 3946 4588 L +3946 389 mt 3946 415 L +DO +3946 4615 mt 3946 389 L +3946 389 mt 3946 389 L +SO +4008 4615 mt 4008 4588 L +4008 389 mt 4008 415 L +DO +4008 4615 mt 4008 389 L +4008 389 mt 4008 389 L +SO +4063 4615 mt 4063 4588 L +4063 389 mt 4063 415 L +DO +4063 4615 mt 4063 389 L +4063 389 mt 4063 389 L +SO +4112 4615 mt 4112 4588 L +4112 389 mt 4112 415 L +DO +4112 4615 mt 4112 389 L +4112 389 mt 4112 389 L +SO +4112 4615 mt 4112 4561 L +4112 389 mt 4112 442 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +4024 4797 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +4157 4723 mt +(3) s +4435 4615 mt 4435 4588 L +4435 389 mt 4435 415 L +DO +4435 4615 mt 4435 389 L +4435 389 mt 4435 389 L +SO +4623 4615 mt 4623 4588 L +4623 389 mt 4623 415 L +DO +4623 4615 mt 4623 389 L +4623 389 mt 4623 389 L +SO +4757 4615 mt 4757 4588 L +4757 389 mt 4757 415 L +DO +4757 4615 mt 4757 389 L +4757 389 mt 4757 389 L +SO +4861 4615 mt 4861 4588 L +4861 389 mt 4861 415 L +DO +4861 4615 mt 4861 389 L +4861 389 mt 4861 389 L +SO +4946 4615 mt 4946 4588 L +4946 389 mt 4946 415 L +DO +4946 4615 mt 4946 389 L +4946 389 mt 4946 389 L +SO +5017 4615 mt 5017 4588 L +5017 389 mt 5017 415 L +DO +5017 4615 mt 5017 389 L +5017 389 mt 5017 389 L +SO +5079 4615 mt 5079 4588 L +5079 389 mt 5079 415 L +DO +5079 4615 mt 5079 389 L +5079 389 mt 5079 389 L +SO +5134 4615 mt 5134 4588 L +5134 389 mt 5134 415 L +DO +5134 4615 mt 5134 389 L +5134 389 mt 5134 389 L +SO +5183 4615 mt 5183 4588 L +5183 389 mt 5183 415 L +DO +5183 4615 mt 5183 389 L +5183 389 mt 5183 389 L +SO +5183 4615 mt 5183 4561 L +5183 389 mt 5183 442 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +5095 4797 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +5228 4723 mt +(4) s +5506 4615 mt 5506 4588 L +5506 389 mt 5506 415 L +DO +5506 4615 mt 5506 389 L +5506 389 mt 5506 389 L +SO +5694 4615 mt 5694 4588 L +5694 389 mt 5694 415 L +DO +5694 4615 mt 5694 389 L +5694 389 mt 5694 389 L +SO +5828 4615 mt 5828 4588 L +5828 389 mt 5828 415 L +DO +5828 4615 mt 5828 389 L +5828 389 mt 5828 389 L +SO +5932 4615 mt 5932 4588 L +5932 389 mt 5932 415 L +DO +5932 4615 mt 5932 389 L +5932 389 mt 5932 389 L +SO +6017 4615 mt 6017 4588 L +6017 389 mt 6017 415 L +DO +6017 4615 mt 6017 389 L +6017 389 mt 6017 389 L +SO +6089 4615 mt 6089 4588 L +6089 389 mt 6089 415 L +DO +6089 4615 mt 6089 389 L +6089 389 mt 6089 389 L +SO +6151 4615 mt 6151 4588 L +6151 389 mt 6151 415 L +DO +6151 4615 mt 6151 389 L +6151 389 mt 6151 389 L +SO +6205 4615 mt 6205 4588 L +6205 389 mt 6205 415 L +DO +6205 4615 mt 6205 389 L +6205 389 mt 6205 389 L +SO +6255 4615 mt 6255 4588 L +6255 389 mt 6255 415 L +DO +6255 4615 mt 6255 389 L +6255 389 mt 6255 389 L +SO +6255 4615 mt 6255 4561 L +6255 389 mt 6255 442 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +6167 4797 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +6300 4723 mt +(5) s + 899 4615 mt 952 4615 L +6255 4615 mt 6201 4615 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + + 798 4659 mt +(0) s + 899 4192 mt 952 4192 L +6255 4192 mt 6201 4192 L + 698 4236 mt +(0.1) s + 899 3769 mt 952 3769 L +6255 3769 mt 6201 3769 L + 698 3813 mt +(0.2) s + 899 3347 mt 952 3347 L +6255 3347 mt 6201 3347 L + 698 3391 mt +(0.3) s + 899 2924 mt 952 2924 L +6255 2924 mt 6201 2924 L + 698 2968 mt +(0.4) s + 899 2502 mt 952 2502 L +6255 2502 mt 6201 2502 L + 698 2546 mt +(0.5) s + 899 2079 mt 952 2079 L +6255 2079 mt 6201 2079 L + 698 2123 mt +(0.6) s + 899 1656 mt 952 1656 L +6255 1656 mt 6201 1656 L + 698 1700 mt +(0.7) s + 899 1234 mt 952 1234 L +6255 1234 mt 6201 1234 L + 698 1278 mt +(0.8) s + 899 811 mt 952 811 L +6255 811 mt 6201 811 L + 698 855 mt +(0.9) s + 899 389 mt 952 389 L +6255 389 mt 6201 389 L + 798 433 mt +(1) s + 899 4615 mt 6255 4615 L + 899 389 mt 6255 389 L + 899 4615 mt 899 389 L +6255 4615 mt 6255 389 L +gs 899 389 5357 4227 MR c np +322 -20 322 -50 323 -89 322 -157 323 -232 322 -349 323 -431 322 -523 +323 -587 322 -596 323 -580 322 -399 323 -150 322 -44 1221 4605 15 MP stroke +DA +322 -8 322 -49 323 -92 322 -200 323 -224 322 -379 323 -536 322 -679 +323 -671 322 -591 323 -442 322 -262 323 -58 322 -8 1221 4595 15 MP stroke +gr + +DA +3087 4940 mt +(Package Size $kB$) s + 616 3117 mt -90 rotate +(Cumulative Distribution) s +90 rotate + 882 4658 mt +( ) s +6239 431 mt +( ) s +SO +1 sg +0 334 1208 0 0 -334 4988 784 4 MP +PP +-1208 0 0 334 1208 0 0 -334 4988 784 5 MP stroke +4 w +DO +SO +6 w +0 sg +4988 784 mt 6196 784 L +4988 450 mt 6196 450 L +4988 784 mt 4988 450 L +6196 784 mt 6196 450 L +4988 784 mt 6196 784 L +4988 784 mt 4988 450 L +4988 784 mt 6196 784 L +4988 450 mt 6196 450 L +4988 784 mt 4988 450 L +6196 784 mt 6196 450 L +5450 585 mt +(By Number) s +gs 4988 450 1209 335 MR c np +355 0 5059 543 2 MP stroke +gr + +5450 734 mt +(By Popularity) s +gs 4988 450 1209 335 MR c np +DA +355 0 5059 690 2 MP stroke +SO +gr + + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF