Download from peers using the hash instead of a directory location.
authorCameron Dale <camrdale@gmail.com>
Mon, 18 Feb 2008 23:50:48 +0000 (15:50 -0800)
committerCameron Dale <camrdale@gmail.com>
Mon, 18 Feb 2008 23:50:48 +0000 (15:50 -0800)
Also updated the HTTP downloading tests to not use www.camrdale.org
but instead use the less changing IETF RFCs.

TODO
apt_dht/HTTPDownloader.py
apt_dht/PeerManager.py
apt_dht/apt_dht.py

diff --git a/TODO b/TODO
index 798214179d83abee242a015c409e08c5e6da88e9..5f9555b75acc1b733bf3a03383e848e20135552e 100644 (file)
--- a/TODO
+++ b/TODO
@@ -22,16 +22,6 @@ distributions. They need to either be ignored, or dealt with properly by
 adding them to the tracking done by the AptPackages module.
 
 
 adding them to the tracking done by the AptPackages module.
 
 
-Change file identifier from path to hash.
-
-Some files can change without changing the path, since the file was 
-added to the DHT by the peer. Examples are Release, Packages.gz, and 
-Sources.bz2. This would cause problems when requesting these files by 
-path. Instead, share the files by hash, then the request would be for 
-http://127.3.45.9:9977/~<urlencodedHash>, and it would always work. This 
-will require a database lookup for every request.
-
-
 PeerManager needs to download large files from multiple peers.
 
 The PeerManager currently chooses a peer at random from the list of 
 PeerManager needs to download large files from multiple peers.
 
 The PeerManager currently chooses a peer at random from the list of 
index c906c0e87d133a52310dd91800449af90c95a2e4..e028a3b5cc4a6176027e3f2cdcb3181535ce8c07 100644 (file)
@@ -146,25 +146,25 @@ class TestClientManager(unittest.TestCase):
         stream_mod.readStream(resp.stream, print_).addCallback(printdone)
     
     def test_download(self):
         stream_mod.readStream(resp.stream, print_).addCallback(printdone)
     
     def test_download(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
         self.client = HTTPClientManager(host, 80)
         self.timeout = 10
         
         self.client = HTTPClientManager(host, 80)
         self.timeout = 10
         
-        d = self.client.get('/robots.txt')
-        d.addCallback(self.gotResp, 1, 309)
+        d = self.client.get('/rfc/rfc0013.txt')
+        d.addCallback(self.gotResp, 1, 1070)
         return d
         
     def test_head(self):
         return d
         
     def test_head(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
         self.client = HTTPClientManager(host, 80)
         self.timeout = 10
         
         self.client = HTTPClientManager(host, 80)
         self.timeout = 10
         
-        d = self.client.get('/robots.txt', "HEAD")
+        d = self.client.get('/rfc/rfc0013.txt', "HEAD")
         d.addCallback(self.gotResp, 1, 0)
         return d
         
     def test_multiple_downloads(self):
         d.addCallback(self.gotResp, 1, 0)
         return d
         
     def test_multiple_downloads(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
         self.client = HTTPClientManager(host, 80)
         self.timeout = 120
         lastDefer = defer.Deferred()
         self.client = HTTPClientManager(host, 80)
         self.timeout = 120
         lastDefer = defer.Deferred()
@@ -175,20 +175,20 @@ class TestClientManager(unittest.TestCase):
             if last:
                 d.addBoth(lastDefer.callback)
                 
             if last:
                 d.addBoth(lastDefer.callback)
                 
-        newRequest("/", 1, 3433)
-        newRequest("/blog/", 2, 39152)
-        newRequest("/camrdale.html", 3, 2234)
-        self.pending_calls.append(reactor.callLater(1, newRequest, '/robots.txt', 4, 309))
-        self.pending_calls.append(reactor.callLater(10, newRequest, '/wikilink.html', 5, 3084))
-        self.pending_calls.append(reactor.callLater(30, newRequest, '/sitemap.html', 6, 4756))
-        self.pending_calls.append(reactor.callLater(31, newRequest, '/PlanetLab.html', 7, 2783))
-        self.pending_calls.append(reactor.callLater(32, newRequest, '/openid.html', 8, 2525))
-        self.pending_calls.append(reactor.callLater(32, newRequest, '/subpage.html', 9, 2381))
-        self.pending_calls.append(reactor.callLater(62, newRequest, '/sitemap2.rss', 0, 313470, True))
+        newRequest("/rfc/rfc0006.txt", 1, 1776)
+        newRequest("/rfc/rfc2362.txt", 2, 159833)
+        newRequest("/rfc/rfc0801.txt", 3, 40824)
+        self.pending_calls.append(reactor.callLater(1, newRequest, '/rfc/rfc0013.txt', 4, 1070))
+        self.pending_calls.append(reactor.callLater(10, newRequest, '/rfc/rfc0022.txt', 5, 4606))
+        self.pending_calls.append(reactor.callLater(30, newRequest, '/rfc/rfc0048.txt', 6, 41696))
+        self.pending_calls.append(reactor.callLater(31, newRequest, '/rfc/rfc3261.txt', 7, 647976))
+        self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0014.txt', 8, 27))
+        self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0001.txt', 9, 21088))
+        self.pending_calls.append(reactor.callLater(62, newRequest, '/rfc/rfc2801.txt', 0, 598794, True))
         return lastDefer
         
     def test_multiple_quick_downloads(self):
         return lastDefer
         
     def test_multiple_quick_downloads(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
         self.client = HTTPClientManager(host, 80)
         self.timeout = 30
         lastDefer = defer.Deferred()
         self.client = HTTPClientManager(host, 80)
         self.timeout = 30
         lastDefer = defer.Deferred()
@@ -199,24 +199,24 @@ class TestClientManager(unittest.TestCase):
             if last:
                 d.addBoth(lastDefer.callback)
                 
             if last:
                 d.addBoth(lastDefer.callback)
                 
-        newRequest("/", 1, 3433)
-        newRequest("/blog/", 2, 39152)
-        newRequest("/camrdale.html", 3, 2234)
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/robots.txt', 4, 309))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/wikilink.html', 5, 3084))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/sitemap.html', 6, 4756))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/PlanetLab.html', 7, 2783))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/openid.html', 8, 2525))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/subpage.html', 9, 2381))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/sitemap2.rss', 0, 313470, True))
+        newRequest("/rfc/rfc0006.txt", 1, 1776)
+        newRequest("/rfc/rfc2362.txt", 2, 159833)
+        newRequest("/rfc/rfc0801.txt", 3, 40824)
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0013.txt', 4, 1070))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0022.txt', 5, 4606))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0048.txt', 6, 41696))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc3261.txt', 7, 647976))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0014.txt', 8, 27))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0001.txt', 9, 21088))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc2801.txt', 0, 598794, True))
         return lastDefer
         
     def test_range(self):
         return lastDefer
         
     def test_range(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
         self.client = HTTPClientManager(host, 80)
         self.timeout = 10
         
         self.client = HTTPClientManager(host, 80)
         self.timeout = 10
         
-        d = self.client.getRange('/robots.txt', 100, 199)
+        d = self.client.getRange('/rfc/rfc0013.txt', 100, 199)
         d.addCallback(self.gotResp, 1, 100)
         return d
         
         d.addCallback(self.gotResp, 1, 100)
         return d
         
index 75c135d2b946c0dca0c26daaf9d4a3e2a0088f61..8c193b7f453f9a79608f6647d6aa2214441918c1 100644 (file)
@@ -14,18 +14,23 @@ class PeerManager:
     def __init__(self):
         self.clients = {}
         
     def __init__(self):
         self.clients = {}
         
-    def get(self, locations, method="GET", modtime=None):
-        """Download from a list of peers.
+    def get(self, hash, mirror, peers = [], method="GET", modtime=None):
+        """Download from a list of peers or fallback to a mirror.
         
         
-        @type locations: C{list} of C{string}
-        @var locations: a list of the locations where the file can be found
+        @type peers: C{list} of C{string}
+        @param peers: a list of the peers where the file can be found
         """
         """
-        url = choice(locations)
-        log.msg('Downloading %s' % url)
-        parsed = urlparse(url)
-        assert parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0]
-        host, port = splitHostPort(parsed[0], parsed[1])
-        path = urlunparse(('', '') + parsed[2:])
+        if peers:
+            peer = choice(peers)
+            log.msg('Downloading from peer %s' % peer)
+            host, port = splitHostPort('http', peer)
+            path = '/~/' + hash
+        else:
+            log.msg('Downloading (%s) from mirror %s' % (method, mirror))
+            parsed = urlparse(mirror)
+            assert parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0]
+            host, port = splitHostPort(parsed[0], parsed[1])
+            path = urlunparse(('', '') + parsed[2:])
 
         return self.getPeer(host, port, path, method, modtime)
         
 
         return self.getPeer(host, port, path, method, modtime)
         
@@ -62,17 +67,17 @@ class TestPeerManager(unittest.TestCase):
         self.manager = PeerManager()
         self.timeout = 10
         
         self.manager = PeerManager()
         self.timeout = 10
         
-        host = 'www.camrdale.org'
-        d = self.manager.get(['http://' + host + '/robots.txt'])
-        d.addCallback(self.gotResp, 1, 309)
+        host = 'www.ietf.org'
+        d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt')
+        d.addCallback(self.gotResp, 1, 1070)
         return d
         
     def test_head(self):
         self.manager = PeerManager()
         self.timeout = 10
         
         return d
         
     def test_head(self):
         self.manager = PeerManager()
         self.timeout = 10
         
-        host = 'www.camrdale.org'
-        d = self.manager.get(['http://' + host + '/robots.txt'], "HEAD")
+        host = 'www.ietf.org'
+        d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt', method = "HEAD")
         d.addCallback(self.gotResp, 1, 0)
         return d
         
         d.addCallback(self.gotResp, 1, 0)
         return d
         
@@ -82,20 +87,20 @@ class TestPeerManager(unittest.TestCase):
         lastDefer = defer.Deferred()
         
         def newRequest(host, path, num, expect, last=False):
         lastDefer = defer.Deferred()
         
         def newRequest(host, path, num, expect, last=False):
-            d = self.manager.get(['http://' + host + ':' + str(80) + path])
+            d = self.manager.get('', 'http://' + host + ':' + str(80) + path)
             d.addCallback(self.gotResp, num, expect)
             if last:
                 d.addBoth(lastDefer.callback)
                 
             d.addCallback(self.gotResp, num, expect)
             if last:
                 d.addBoth(lastDefer.callback)
                 
-        newRequest('www.camrdale.org', "/", 1, 3433)
-        newRequest('www.camrdale.org', "/blog/", 2, 39152)
+        newRequest('www.ietf.org', "/rfc/rfc0006.txt", 1, 1776)
+        newRequest('www.ietf.org', "/rfc/rfc2362.txt", 2, 159833)
         newRequest('www.google.ca', "/", 3, None)
         self.pending_calls.append(reactor.callLater(1, newRequest, 'www.sfu.ca', '/', 4, None))
         newRequest('www.google.ca', "/", 3, None)
         self.pending_calls.append(reactor.callLater(1, newRequest, 'www.sfu.ca', '/', 4, None))
-        self.pending_calls.append(reactor.callLater(10, newRequest, 'www.camrdale.org', '/wikilink.html', 5, 3084))
-        self.pending_calls.append(reactor.callLater(30, newRequest, 'www.camrdale.org', '/sitemap.html', 6, 4756))
+        self.pending_calls.append(reactor.callLater(10, newRequest, 'www.ietf.org', '/rfc/rfc0048.txt', 5, 41696))
+        self.pending_calls.append(reactor.callLater(30, newRequest, 'www.ietf.org', '/rfc/rfc0022.txt', 6, 4606))
         self.pending_calls.append(reactor.callLater(31, newRequest, 'www.sfu.ca', '/studentcentral/index.html', 7, None))
         self.pending_calls.append(reactor.callLater(31, newRequest, 'www.sfu.ca', '/studentcentral/index.html', 7, None))
-        self.pending_calls.append(reactor.callLater(32, newRequest, 'www.camrdale.org', '/openid.html', 8, 2525))
-        self.pending_calls.append(reactor.callLater(32, newRequest, 'www.camrdale.org', '/subpage.html', 9, 2381))
+        self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0014.txt', 8, 27))
+        self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0001.txt', 9, 21088))
         self.pending_calls.append(reactor.callLater(62, newRequest, 'www.google.ca', '/intl/en/options/', 0, None, True))
         return lastDefer
         
         self.pending_calls.append(reactor.callLater(62, newRequest, 'www.google.ca', '/intl/en/options/', 0, None, True))
         return lastDefer
         
index 264be0f65aba5e499425a9d83feb33a4e0e3d9cc..9d0241d6b0f023861d0fc375aa362f4e250992d8 100644 (file)
@@ -52,7 +52,7 @@ class AptDHT:
     
     def check_freshness(self, req, path, modtime, resp):
         log.msg('Checking if %s is still fresh' % path)
     
     def check_freshness(self, req, path, modtime, resp):
         log.msg('Checking if %s is still fresh' % path)
-        d = self.peers.get([path], "HEAD", modtime)
+        d = self.peers.get('', path, method = "HEAD", modtime = modtime)
         d.addCallback(self.check_freshness_done, req, path, resp)
         return d
     
         d.addCallback(self.check_freshness_done, req, path, resp)
         return d
     
@@ -132,14 +132,14 @@ class AptDHT:
     def lookupHash_done(self, locations, hash, path, d):
         if not locations:
             log.msg('Peers for %s were not found' % path)
     def lookupHash_done(self, locations, hash, path, d):
         if not locations:
             log.msg('Peers for %s were not found' % path)
-            getDefer = self.peers.get([path])
+            getDefer = self.peers.get(hash, path)
             getDefer.addCallback(self.cache.save_file, hash, path)
             getDefer.addErrback(self.cache.save_error, path)
             getDefer.addCallbacks(d.callback, d.errback)
         else:
             log.msg('Found peers for %s: %r' % (path, locations))
             # Download from the found peers
             getDefer.addCallback(self.cache.save_file, hash, path)
             getDefer.addErrback(self.cache.save_error, path)
             getDefer.addCallbacks(d.callback, d.errback)
         else:
             log.msg('Found peers for %s: %r' % (path, locations))
             # Download from the found peers
-            getDefer = self.peers.get(locations)
+            getDefer = self.peers.get(hash, path, locations)
             getDefer.addCallback(self.check_response, hash, path)
             getDefer.addCallback(self.cache.save_file, hash, path)
             getDefer.addErrback(self.cache.save_error, path)
             getDefer.addCallback(self.check_response, hash, path)
             getDefer.addCallback(self.cache.save_file, hash, path)
             getDefer.addErrback(self.cache.save_error, path)
@@ -148,7 +148,7 @@ class AptDHT:
     def check_response(self, response, hash, path):
         if response.code < 200 or response.code >= 300:
             log.msg('Download from peers failed, going to direct download: %s' % path)
     def check_response(self, response, hash, path):
         if response.code < 200 or response.code >= 300:
             log.msg('Download from peers failed, going to direct download: %s' % path)
-            getDefer = self.peers.get([path])
+            getDefer = self.peers.get(hash, path)
             return getDefer
         return response
         
             return getDefer
         return response