Download from peers using the hash instead of a directory location.
authorCameron Dale <camrdale@gmail.com>
Mon, 18 Feb 2008 23:50:48 +0000 (15:50 -0800)
committerCameron Dale <camrdale@gmail.com>
Mon, 18 Feb 2008 23:50:48 +0000 (15:50 -0800)
Also updated the HTTP downloading tests to not use www.camrdale.org
but instead use the less changing IETF RFCs.

TODO
apt_dht/HTTPDownloader.py
apt_dht/PeerManager.py
apt_dht/apt_dht.py

diff --git a/TODO b/TODO
index 7982141..5f9555b 100644 (file)
--- a/TODO
+++ b/TODO
@@ -22,16 +22,6 @@ distributions. They need to either be ignored, or dealt with properly by
 adding them to the tracking done by the AptPackages module.
 
 
-Change file identifier from path to hash.
-
-Some files can change without changing the path, since the file was 
-added to the DHT by the peer. Examples are Release, Packages.gz, and 
-Sources.bz2. This would cause problems when requesting these files by 
-path. Instead, share the files by hash, then the request would be for 
-http://127.3.45.9:9977/~<urlencodedHash>, and it would always work. This 
-will require a database lookup for every request.
-
-
 PeerManager needs to download large files from multiple peers.
 
 The PeerManager currently chooses a peer at random from the list of 
index c906c0e..e028a3b 100644 (file)
@@ -146,25 +146,25 @@ class TestClientManager(unittest.TestCase):
         stream_mod.readStream(resp.stream, print_).addCallback(printdone)
     
     def test_download(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
         self.client = HTTPClientManager(host, 80)
         self.timeout = 10
         
-        d = self.client.get('/robots.txt')
-        d.addCallback(self.gotResp, 1, 309)
+        d = self.client.get('/rfc/rfc0013.txt')
+        d.addCallback(self.gotResp, 1, 1070)
         return d
         
     def test_head(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
         self.client = HTTPClientManager(host, 80)
         self.timeout = 10
         
-        d = self.client.get('/robots.txt', "HEAD")
+        d = self.client.get('/rfc/rfc0013.txt', "HEAD")
         d.addCallback(self.gotResp, 1, 0)
         return d
         
     def test_multiple_downloads(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
         self.client = HTTPClientManager(host, 80)
         self.timeout = 120
         lastDefer = defer.Deferred()
@@ -175,20 +175,20 @@ class TestClientManager(unittest.TestCase):
             if last:
                 d.addBoth(lastDefer.callback)
                 
-        newRequest("/", 1, 3433)
-        newRequest("/blog/", 2, 39152)
-        newRequest("/camrdale.html", 3, 2234)
-        self.pending_calls.append(reactor.callLater(1, newRequest, '/robots.txt', 4, 309))
-        self.pending_calls.append(reactor.callLater(10, newRequest, '/wikilink.html', 5, 3084))
-        self.pending_calls.append(reactor.callLater(30, newRequest, '/sitemap.html', 6, 4756))
-        self.pending_calls.append(reactor.callLater(31, newRequest, '/PlanetLab.html', 7, 2783))
-        self.pending_calls.append(reactor.callLater(32, newRequest, '/openid.html', 8, 2525))
-        self.pending_calls.append(reactor.callLater(32, newRequest, '/subpage.html', 9, 2381))
-        self.pending_calls.append(reactor.callLater(62, newRequest, '/sitemap2.rss', 0, 313470, True))
+        newRequest("/rfc/rfc0006.txt", 1, 1776)
+        newRequest("/rfc/rfc2362.txt", 2, 159833)
+        newRequest("/rfc/rfc0801.txt", 3, 40824)
+        self.pending_calls.append(reactor.callLater(1, newRequest, '/rfc/rfc0013.txt', 4, 1070))
+        self.pending_calls.append(reactor.callLater(10, newRequest, '/rfc/rfc0022.txt', 5, 4606))
+        self.pending_calls.append(reactor.callLater(30, newRequest, '/rfc/rfc0048.txt', 6, 41696))
+        self.pending_calls.append(reactor.callLater(31, newRequest, '/rfc/rfc3261.txt', 7, 647976))
+        self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0014.txt', 8, 27))
+        self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0001.txt', 9, 21088))
+        self.pending_calls.append(reactor.callLater(62, newRequest, '/rfc/rfc2801.txt', 0, 598794, True))
         return lastDefer
         
     def test_multiple_quick_downloads(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
         self.client = HTTPClientManager(host, 80)
         self.timeout = 30
         lastDefer = defer.Deferred()
@@ -199,24 +199,24 @@ class TestClientManager(unittest.TestCase):
             if last:
                 d.addBoth(lastDefer.callback)
                 
-        newRequest("/", 1, 3433)
-        newRequest("/blog/", 2, 39152)
-        newRequest("/camrdale.html", 3, 2234)
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/robots.txt', 4, 309))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/wikilink.html', 5, 3084))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/sitemap.html', 6, 4756))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/PlanetLab.html', 7, 2783))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/openid.html', 8, 2525))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/subpage.html', 9, 2381))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/sitemap2.rss', 0, 313470, True))
+        newRequest("/rfc/rfc0006.txt", 1, 1776)
+        newRequest("/rfc/rfc2362.txt", 2, 159833)
+        newRequest("/rfc/rfc0801.txt", 3, 40824)
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0013.txt', 4, 1070))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0022.txt', 5, 4606))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0048.txt', 6, 41696))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc3261.txt', 7, 647976))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0014.txt', 8, 27))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0001.txt', 9, 21088))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc2801.txt', 0, 598794, True))
         return lastDefer
         
     def test_range(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
         self.client = HTTPClientManager(host, 80)
         self.timeout = 10
         
-        d = self.client.getRange('/robots.txt', 100, 199)
+        d = self.client.getRange('/rfc/rfc0013.txt', 100, 199)
         d.addCallback(self.gotResp, 1, 100)
         return d
         
index 75c135d..8c193b7 100644 (file)
@@ -14,18 +14,23 @@ class PeerManager:
     def __init__(self):
         self.clients = {}
         
-    def get(self, locations, method="GET", modtime=None):
-        """Download from a list of peers.
+    def get(self, hash, mirror, peers = [], method="GET", modtime=None):
+        """Download from a list of peers or fallback to a mirror.
         
-        @type locations: C{list} of C{string}
-        @var locations: a list of the locations where the file can be found
+        @type peers: C{list} of C{string}
+        @param peers: a list of the peers where the file can be found
         """
-        url = choice(locations)
-        log.msg('Downloading %s' % url)
-        parsed = urlparse(url)
-        assert parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0]
-        host, port = splitHostPort(parsed[0], parsed[1])
-        path = urlunparse(('', '') + parsed[2:])
+        if peers:
+            peer = choice(peers)
+            log.msg('Downloading from peer %s' % peer)
+            host, port = splitHostPort('http', peer)
+            path = '/~/' + hash
+        else:
+            log.msg('Downloading (%s) from mirror %s' % (method, mirror))
+            parsed = urlparse(mirror)
+            assert parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0]
+            host, port = splitHostPort(parsed[0], parsed[1])
+            path = urlunparse(('', '') + parsed[2:])
 
         return self.getPeer(host, port, path, method, modtime)
         
@@ -62,17 +67,17 @@ class TestPeerManager(unittest.TestCase):
         self.manager = PeerManager()
         self.timeout = 10
         
-        host = 'www.camrdale.org'
-        d = self.manager.get(['http://' + host + '/robots.txt'])
-        d.addCallback(self.gotResp, 1, 309)
+        host = 'www.ietf.org'
+        d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt')
+        d.addCallback(self.gotResp, 1, 1070)
         return d
         
     def test_head(self):
         self.manager = PeerManager()
         self.timeout = 10
         
-        host = 'www.camrdale.org'
-        d = self.manager.get(['http://' + host + '/robots.txt'], "HEAD")
+        host = 'www.ietf.org'
+        d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt', method = "HEAD")
         d.addCallback(self.gotResp, 1, 0)
         return d
         
@@ -82,20 +87,20 @@ class TestPeerManager(unittest.TestCase):
         lastDefer = defer.Deferred()
         
         def newRequest(host, path, num, expect, last=False):
-            d = self.manager.get(['http://' + host + ':' + str(80) + path])
+            d = self.manager.get('', 'http://' + host + ':' + str(80) + path)
             d.addCallback(self.gotResp, num, expect)
             if last:
                 d.addBoth(lastDefer.callback)
                 
-        newRequest('www.camrdale.org', "/", 1, 3433)
-        newRequest('www.camrdale.org', "/blog/", 2, 39152)
+        newRequest('www.ietf.org', "/rfc/rfc0006.txt", 1, 1776)
+        newRequest('www.ietf.org', "/rfc/rfc2362.txt", 2, 159833)
         newRequest('www.google.ca', "/", 3, None)
         self.pending_calls.append(reactor.callLater(1, newRequest, 'www.sfu.ca', '/', 4, None))
-        self.pending_calls.append(reactor.callLater(10, newRequest, 'www.camrdale.org', '/wikilink.html', 5, 3084))
-        self.pending_calls.append(reactor.callLater(30, newRequest, 'www.camrdale.org', '/sitemap.html', 6, 4756))
+        self.pending_calls.append(reactor.callLater(10, newRequest, 'www.ietf.org', '/rfc/rfc0048.txt', 5, 41696))
+        self.pending_calls.append(reactor.callLater(30, newRequest, 'www.ietf.org', '/rfc/rfc0022.txt', 6, 4606))
         self.pending_calls.append(reactor.callLater(31, newRequest, 'www.sfu.ca', '/studentcentral/index.html', 7, None))
-        self.pending_calls.append(reactor.callLater(32, newRequest, 'www.camrdale.org', '/openid.html', 8, 2525))
-        self.pending_calls.append(reactor.callLater(32, newRequest, 'www.camrdale.org', '/subpage.html', 9, 2381))
+        self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0014.txt', 8, 27))
+        self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0001.txt', 9, 21088))
         self.pending_calls.append(reactor.callLater(62, newRequest, 'www.google.ca', '/intl/en/options/', 0, None, True))
         return lastDefer
         
index 264be0f..9d0241d 100644 (file)
@@ -52,7 +52,7 @@ class AptDHT:
     
     def check_freshness(self, req, path, modtime, resp):
         log.msg('Checking if %s is still fresh' % path)
-        d = self.peers.get([path], "HEAD", modtime)
+        d = self.peers.get('', path, method = "HEAD", modtime = modtime)
         d.addCallback(self.check_freshness_done, req, path, resp)
         return d
     
@@ -132,14 +132,14 @@ class AptDHT:
     def lookupHash_done(self, locations, hash, path, d):
         if not locations:
             log.msg('Peers for %s were not found' % path)
-            getDefer = self.peers.get([path])
+            getDefer = self.peers.get(hash, path)
             getDefer.addCallback(self.cache.save_file, hash, path)
             getDefer.addErrback(self.cache.save_error, path)
             getDefer.addCallbacks(d.callback, d.errback)
         else:
             log.msg('Found peers for %s: %r' % (path, locations))
             # Download from the found peers
-            getDefer = self.peers.get(locations)
+            getDefer = self.peers.get(hash, path, locations)
             getDefer.addCallback(self.check_response, hash, path)
             getDefer.addCallback(self.cache.save_file, hash, path)
             getDefer.addErrback(self.cache.save_error, path)
@@ -148,7 +148,7 @@ class AptDHT:
     def check_response(self, response, hash, path):
         if response.code < 200 or response.code >= 300:
             log.msg('Download from peers failed, going to direct download: %s' % path)
-            getDefer = self.peers.get([path])
+            getDefer = self.peers.get(hash, path)
             return getDefer
         return response