From 1461780708c6ce0562ec0a27062b13b32d7dc348 Mon Sep 17 00:00:00 2001 From: Cameron Dale Date: Mon, 18 Feb 2008 15:50:48 -0800 Subject: [PATCH] Download from peers using the hash instead of a directory location. Also updated the HTTP downloading tests to not use www.camrdale.org but instead use the less changing IETF RFCs. --- TODO | 10 ------- apt_dht/HTTPDownloader.py | 58 +++++++++++++++++++-------------------- apt_dht/PeerManager.py | 49 ++++++++++++++++++--------------- apt_dht/apt_dht.py | 8 +++--- 4 files changed, 60 insertions(+), 65 deletions(-) diff --git a/TODO b/TODO index 7982141..5f9555b 100644 --- a/TODO +++ b/TODO @@ -22,16 +22,6 @@ distributions. They need to either be ignored, or dealt with properly by adding them to the tracking done by the AptPackages module. -Change file identifier from path to hash. - -Some files can change without changing the path, since the file was -added to the DHT by the peer. Examples are Release, Packages.gz, and -Sources.bz2. This would cause problems when requesting these files by -path. Instead, share the files by hash, then the request would be for -http://127.3.45.9:9977/~, and it would always work. This -will require a database lookup for every request. - - PeerManager needs to download large files from multiple peers. The PeerManager currently chooses a peer at random from the list of diff --git a/apt_dht/HTTPDownloader.py b/apt_dht/HTTPDownloader.py index c906c0e..e028a3b 100644 --- a/apt_dht/HTTPDownloader.py +++ b/apt_dht/HTTPDownloader.py @@ -146,25 +146,25 @@ class TestClientManager(unittest.TestCase): stream_mod.readStream(resp.stream, print_).addCallback(printdone) def test_download(self): - host = 'www.camrdale.org' + host = 'www.ietf.org' self.client = HTTPClientManager(host, 80) self.timeout = 10 - d = self.client.get('/robots.txt') - d.addCallback(self.gotResp, 1, 309) + d = self.client.get('/rfc/rfc0013.txt') + d.addCallback(self.gotResp, 1, 1070) return d def test_head(self): - host = 'www.camrdale.org' + host = 'www.ietf.org' self.client = HTTPClientManager(host, 80) self.timeout = 10 - d = self.client.get('/robots.txt', "HEAD") + d = self.client.get('/rfc/rfc0013.txt', "HEAD") d.addCallback(self.gotResp, 1, 0) return d def test_multiple_downloads(self): - host = 'www.camrdale.org' + host = 'www.ietf.org' self.client = HTTPClientManager(host, 80) self.timeout = 120 lastDefer = defer.Deferred() @@ -175,20 +175,20 @@ class TestClientManager(unittest.TestCase): if last: d.addBoth(lastDefer.callback) - newRequest("/", 1, 3433) - newRequest("/blog/", 2, 39152) - newRequest("/camrdale.html", 3, 2234) - self.pending_calls.append(reactor.callLater(1, newRequest, '/robots.txt', 4, 309)) - self.pending_calls.append(reactor.callLater(10, newRequest, '/wikilink.html', 5, 3084)) - self.pending_calls.append(reactor.callLater(30, newRequest, '/sitemap.html', 6, 4756)) - self.pending_calls.append(reactor.callLater(31, newRequest, '/PlanetLab.html', 7, 2783)) - self.pending_calls.append(reactor.callLater(32, newRequest, '/openid.html', 8, 2525)) - self.pending_calls.append(reactor.callLater(32, newRequest, '/subpage.html', 9, 2381)) - self.pending_calls.append(reactor.callLater(62, newRequest, '/sitemap2.rss', 0, 313470, True)) + newRequest("/rfc/rfc0006.txt", 1, 1776) + newRequest("/rfc/rfc2362.txt", 2, 159833) + newRequest("/rfc/rfc0801.txt", 3, 40824) + self.pending_calls.append(reactor.callLater(1, newRequest, '/rfc/rfc0013.txt', 4, 1070)) + self.pending_calls.append(reactor.callLater(10, newRequest, '/rfc/rfc0022.txt', 5, 4606)) + self.pending_calls.append(reactor.callLater(30, newRequest, '/rfc/rfc0048.txt', 6, 41696)) + self.pending_calls.append(reactor.callLater(31, newRequest, '/rfc/rfc3261.txt', 7, 647976)) + self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0014.txt', 8, 27)) + self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0001.txt', 9, 21088)) + self.pending_calls.append(reactor.callLater(62, newRequest, '/rfc/rfc2801.txt', 0, 598794, True)) return lastDefer def test_multiple_quick_downloads(self): - host = 'www.camrdale.org' + host = 'www.ietf.org' self.client = HTTPClientManager(host, 80) self.timeout = 30 lastDefer = defer.Deferred() @@ -199,24 +199,24 @@ class TestClientManager(unittest.TestCase): if last: d.addBoth(lastDefer.callback) - newRequest("/", 1, 3433) - newRequest("/blog/", 2, 39152) - newRequest("/camrdale.html", 3, 2234) - self.pending_calls.append(reactor.callLater(0, newRequest, '/robots.txt', 4, 309)) - self.pending_calls.append(reactor.callLater(0, newRequest, '/wikilink.html', 5, 3084)) - self.pending_calls.append(reactor.callLater(0, newRequest, '/sitemap.html', 6, 4756)) - self.pending_calls.append(reactor.callLater(0, newRequest, '/PlanetLab.html', 7, 2783)) - self.pending_calls.append(reactor.callLater(0, newRequest, '/openid.html', 8, 2525)) - self.pending_calls.append(reactor.callLater(0, newRequest, '/subpage.html', 9, 2381)) - self.pending_calls.append(reactor.callLater(0, newRequest, '/sitemap2.rss', 0, 313470, True)) + newRequest("/rfc/rfc0006.txt", 1, 1776) + newRequest("/rfc/rfc2362.txt", 2, 159833) + newRequest("/rfc/rfc0801.txt", 3, 40824) + self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0013.txt', 4, 1070)) + self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0022.txt', 5, 4606)) + self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0048.txt', 6, 41696)) + self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc3261.txt', 7, 647976)) + self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0014.txt', 8, 27)) + self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0001.txt', 9, 21088)) + self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc2801.txt', 0, 598794, True)) return lastDefer def test_range(self): - host = 'www.camrdale.org' + host = 'www.ietf.org' self.client = HTTPClientManager(host, 80) self.timeout = 10 - d = self.client.getRange('/robots.txt', 100, 199) + d = self.client.getRange('/rfc/rfc0013.txt', 100, 199) d.addCallback(self.gotResp, 1, 100) return d diff --git a/apt_dht/PeerManager.py b/apt_dht/PeerManager.py index 75c135d..8c193b7 100644 --- a/apt_dht/PeerManager.py +++ b/apt_dht/PeerManager.py @@ -14,18 +14,23 @@ class PeerManager: def __init__(self): self.clients = {} - def get(self, locations, method="GET", modtime=None): - """Download from a list of peers. + def get(self, hash, mirror, peers = [], method="GET", modtime=None): + """Download from a list of peers or fallback to a mirror. - @type locations: C{list} of C{string} - @var locations: a list of the locations where the file can be found + @type peers: C{list} of C{string} + @param peers: a list of the peers where the file can be found """ - url = choice(locations) - log.msg('Downloading %s' % url) - parsed = urlparse(url) - assert parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0] - host, port = splitHostPort(parsed[0], parsed[1]) - path = urlunparse(('', '') + parsed[2:]) + if peers: + peer = choice(peers) + log.msg('Downloading from peer %s' % peer) + host, port = splitHostPort('http', peer) + path = '/~/' + hash + else: + log.msg('Downloading (%s) from mirror %s' % (method, mirror)) + parsed = urlparse(mirror) + assert parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0] + host, port = splitHostPort(parsed[0], parsed[1]) + path = urlunparse(('', '') + parsed[2:]) return self.getPeer(host, port, path, method, modtime) @@ -62,17 +67,17 @@ class TestPeerManager(unittest.TestCase): self.manager = PeerManager() self.timeout = 10 - host = 'www.camrdale.org' - d = self.manager.get(['http://' + host + '/robots.txt']) - d.addCallback(self.gotResp, 1, 309) + host = 'www.ietf.org' + d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt') + d.addCallback(self.gotResp, 1, 1070) return d def test_head(self): self.manager = PeerManager() self.timeout = 10 - host = 'www.camrdale.org' - d = self.manager.get(['http://' + host + '/robots.txt'], "HEAD") + host = 'www.ietf.org' + d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt', method = "HEAD") d.addCallback(self.gotResp, 1, 0) return d @@ -82,20 +87,20 @@ class TestPeerManager(unittest.TestCase): lastDefer = defer.Deferred() def newRequest(host, path, num, expect, last=False): - d = self.manager.get(['http://' + host + ':' + str(80) + path]) + d = self.manager.get('', 'http://' + host + ':' + str(80) + path) d.addCallback(self.gotResp, num, expect) if last: d.addBoth(lastDefer.callback) - newRequest('www.camrdale.org', "/", 1, 3433) - newRequest('www.camrdale.org', "/blog/", 2, 39152) + newRequest('www.ietf.org', "/rfc/rfc0006.txt", 1, 1776) + newRequest('www.ietf.org', "/rfc/rfc2362.txt", 2, 159833) newRequest('www.google.ca', "/", 3, None) self.pending_calls.append(reactor.callLater(1, newRequest, 'www.sfu.ca', '/', 4, None)) - self.pending_calls.append(reactor.callLater(10, newRequest, 'www.camrdale.org', '/wikilink.html', 5, 3084)) - self.pending_calls.append(reactor.callLater(30, newRequest, 'www.camrdale.org', '/sitemap.html', 6, 4756)) + self.pending_calls.append(reactor.callLater(10, newRequest, 'www.ietf.org', '/rfc/rfc0048.txt', 5, 41696)) + self.pending_calls.append(reactor.callLater(30, newRequest, 'www.ietf.org', '/rfc/rfc0022.txt', 6, 4606)) self.pending_calls.append(reactor.callLater(31, newRequest, 'www.sfu.ca', '/studentcentral/index.html', 7, None)) - self.pending_calls.append(reactor.callLater(32, newRequest, 'www.camrdale.org', '/openid.html', 8, 2525)) - self.pending_calls.append(reactor.callLater(32, newRequest, 'www.camrdale.org', '/subpage.html', 9, 2381)) + self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0014.txt', 8, 27)) + self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0001.txt', 9, 21088)) self.pending_calls.append(reactor.callLater(62, newRequest, 'www.google.ca', '/intl/en/options/', 0, None, True)) return lastDefer diff --git a/apt_dht/apt_dht.py b/apt_dht/apt_dht.py index 264be0f..9d0241d 100644 --- a/apt_dht/apt_dht.py +++ b/apt_dht/apt_dht.py @@ -52,7 +52,7 @@ class AptDHT: def check_freshness(self, req, path, modtime, resp): log.msg('Checking if %s is still fresh' % path) - d = self.peers.get([path], "HEAD", modtime) + d = self.peers.get('', path, method = "HEAD", modtime = modtime) d.addCallback(self.check_freshness_done, req, path, resp) return d @@ -132,14 +132,14 @@ class AptDHT: def lookupHash_done(self, locations, hash, path, d): if not locations: log.msg('Peers for %s were not found' % path) - getDefer = self.peers.get([path]) + getDefer = self.peers.get(hash, path) getDefer.addCallback(self.cache.save_file, hash, path) getDefer.addErrback(self.cache.save_error, path) getDefer.addCallbacks(d.callback, d.errback) else: log.msg('Found peers for %s: %r' % (path, locations)) # Download from the found peers - getDefer = self.peers.get(locations) + getDefer = self.peers.get(hash, path, locations) getDefer.addCallback(self.check_response, hash, path) getDefer.addCallback(self.cache.save_file, hash, path) getDefer.addErrback(self.cache.save_error, path) @@ -148,7 +148,7 @@ class AptDHT: def check_response(self, response, hash, path): if response.code < 200 or response.code >= 300: log.msg('Download from peers failed, going to direct download: %s' % path) - getDefer = self.peers.get([path]) + getDefer = self.peers.get(hash, path) return getDefer return response -- 2.30.2