Download from peers using the hash instead of a directory location.

author Cameron Dale <camrdale@gmail.com>

Mon, 18 Feb 2008 23:50:48 +0000 (15:50 -0800)

committer Cameron Dale <camrdale@gmail.com>

Mon, 18 Feb 2008 23:50:48 +0000 (15:50 -0800)
author Cameron Dale <camrdale@gmail.com>
Mon, 18 Feb 2008 23:50:48 +0000 (15:50 -0800)
committer Cameron Dale <camrdale@gmail.com>
Mon, 18 Feb 2008 23:50:48 +0000 (15:50 -0800)
diff --git a/TODO b/TODO

index 798214179d83abee242a015c409e08c5e6da88e9..5f9555b75acc1b733bf3a03383e848e20135552e 100644 (file)
--- a/TODO
+++ b/TODO
@@ -22,16 +22,6 @@ distributions. They need to either be ignored, or dealt with properly by
  adding them to the tracking done by the AptPackages module.
  
  
-Change file identifier from path to hash.
-
-Some files can change without changing the path, since the file was 
-added to the DHT by the peer. Examples are Release, Packages.gz, and 
-Sources.bz2. This would cause problems when requesting these files by 
-path. Instead, share the files by hash, then the request would be for 
-http://127.3.45.9:9977/~<urlencodedHash>, and it would always work. This 
-will require a database lookup for every request.
-
-
  PeerManager needs to download large files from multiple peers.
  
  The PeerManager currently chooses a peer at random from the list of 
diff --git a/apt_dht/HTTPDownloader.py b/apt_dht/HTTPDownloader.py

index c906c0e87d133a52310dd91800449af90c95a2e4..e028a3b5cc4a6176027e3f2cdcb3181535ce8c07 100644 (file)
--- a/apt_dht/HTTPDownloader.py
+++ b/apt_dht/HTTPDownloader.py
@@ -146,25 +146,25 @@ class TestClientManager(unittest.TestCase):
          stream_mod.readStream(resp.stream, print_).addCallback(printdone)
      
      def test_download(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
          self.client = HTTPClientManager(host, 80)
          self.timeout = 10
          
-        d = self.client.get('/robots.txt')
-        d.addCallback(self.gotResp, 1, 309)
+        d = self.client.get('/rfc/rfc0013.txt')
+        d.addCallback(self.gotResp, 1, 1070)
          return d
          
      def test_head(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
          self.client = HTTPClientManager(host, 80)
          self.timeout = 10
          
-        d = self.client.get('/robots.txt', "HEAD")
+        d = self.client.get('/rfc/rfc0013.txt', "HEAD")
          d.addCallback(self.gotResp, 1, 0)
          return d
          
      def test_multiple_downloads(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
          self.client = HTTPClientManager(host, 80)
          self.timeout = 120
          lastDefer = defer.Deferred()
@@ -175,20 +175,20 @@ class TestClientManager(unittest.TestCase):
              if last:
                  d.addBoth(lastDefer.callback)
                  
-        newRequest("/", 1, 3433)
-        newRequest("/blog/", 2, 39152)
-        newRequest("/camrdale.html", 3, 2234)
-        self.pending_calls.append(reactor.callLater(1, newRequest, '/robots.txt', 4, 309))
-        self.pending_calls.append(reactor.callLater(10, newRequest, '/wikilink.html', 5, 3084))
-        self.pending_calls.append(reactor.callLater(30, newRequest, '/sitemap.html', 6, 4756))
-        self.pending_calls.append(reactor.callLater(31, newRequest, '/PlanetLab.html', 7, 2783))
-        self.pending_calls.append(reactor.callLater(32, newRequest, '/openid.html', 8, 2525))
-        self.pending_calls.append(reactor.callLater(32, newRequest, '/subpage.html', 9, 2381))
-        self.pending_calls.append(reactor.callLater(62, newRequest, '/sitemap2.rss', 0, 313470, True))
+        newRequest("/rfc/rfc0006.txt", 1, 1776)
+        newRequest("/rfc/rfc2362.txt", 2, 159833)
+        newRequest("/rfc/rfc0801.txt", 3, 40824)
+        self.pending_calls.append(reactor.callLater(1, newRequest, '/rfc/rfc0013.txt', 4, 1070))
+        self.pending_calls.append(reactor.callLater(10, newRequest, '/rfc/rfc0022.txt', 5, 4606))
+        self.pending_calls.append(reactor.callLater(30, newRequest, '/rfc/rfc0048.txt', 6, 41696))
+        self.pending_calls.append(reactor.callLater(31, newRequest, '/rfc/rfc3261.txt', 7, 647976))
+        self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0014.txt', 8, 27))
+        self.pending_calls.append(reactor.callLater(32, newRequest, '/rfc/rfc0001.txt', 9, 21088))
+        self.pending_calls.append(reactor.callLater(62, newRequest, '/rfc/rfc2801.txt', 0, 598794, True))
          return lastDefer
          
      def test_multiple_quick_downloads(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
          self.client = HTTPClientManager(host, 80)
          self.timeout = 30
          lastDefer = defer.Deferred()
@@ -199,24 +199,24 @@ class TestClientManager(unittest.TestCase):
              if last:
                  d.addBoth(lastDefer.callback)
                  
-        newRequest("/", 1, 3433)
-        newRequest("/blog/", 2, 39152)
-        newRequest("/camrdale.html", 3, 2234)
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/robots.txt', 4, 309))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/wikilink.html', 5, 3084))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/sitemap.html', 6, 4756))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/PlanetLab.html', 7, 2783))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/openid.html', 8, 2525))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/subpage.html', 9, 2381))
-        self.pending_calls.append(reactor.callLater(0, newRequest, '/sitemap2.rss', 0, 313470, True))
+        newRequest("/rfc/rfc0006.txt", 1, 1776)
+        newRequest("/rfc/rfc2362.txt", 2, 159833)
+        newRequest("/rfc/rfc0801.txt", 3, 40824)
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0013.txt', 4, 1070))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0022.txt', 5, 4606))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0048.txt', 6, 41696))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc3261.txt', 7, 647976))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0014.txt', 8, 27))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc0001.txt', 9, 21088))
+        self.pending_calls.append(reactor.callLater(0, newRequest, '/rfc/rfc2801.txt', 0, 598794, True))
          return lastDefer
          
      def test_range(self):
-        host = 'www.camrdale.org'
+        host = 'www.ietf.org'
          self.client = HTTPClientManager(host, 80)
          self.timeout = 10
          
-        d = self.client.getRange('/robots.txt', 100, 199)
+        d = self.client.getRange('/rfc/rfc0013.txt', 100, 199)
          d.addCallback(self.gotResp, 1, 100)
          return d
          
diff --git a/apt_dht/PeerManager.py b/apt_dht/PeerManager.py

index 75c135d2b946c0dca0c26daaf9d4a3e2a0088f61..8c193b7f453f9a79608f6647d6aa2214441918c1 100644 (file)
--- a/apt_dht/PeerManager.py
+++ b/apt_dht/PeerManager.py
@@ -14,18 +14,23 @@ class PeerManager:
      def __init__(self):
          self.clients = {}
          
-    def get(self, locations, method="GET", modtime=None):
-        """Download from a list of peers.
+    def get(self, hash, mirror, peers = [], method="GET", modtime=None):
+        """Download from a list of peers or fallback to a mirror.
          
-        @type locations: C{list} of C{string}
-        @var locations: a list of the locations where the file can be found
+        @type peers: C{list} of C{string}
+        @param peers: a list of the peers where the file can be found
          """
-        url = choice(locations)
-        log.msg('Downloading %s' % url)
-        parsed = urlparse(url)
-        assert parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0]
-        host, port = splitHostPort(parsed[0], parsed[1])
-        path = urlunparse(('', '') + parsed[2:])
+        if peers:
+            peer = choice(peers)
+            log.msg('Downloading from peer %s' % peer)
+            host, port = splitHostPort('http', peer)
+            path = '/~/' + hash
+        else:
+            log.msg('Downloading (%s) from mirror %s' % (method, mirror))
+            parsed = urlparse(mirror)
+            assert parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0]
+            host, port = splitHostPort(parsed[0], parsed[1])
+            path = urlunparse(('', '') + parsed[2:])
  
          return self.getPeer(host, port, path, method, modtime)
          
@@ -62,17 +67,17 @@ class TestPeerManager(unittest.TestCase):
          self.manager = PeerManager()
          self.timeout = 10
          
-        host = 'www.camrdale.org'
-        d = self.manager.get(['http://' + host + '/robots.txt'])
-        d.addCallback(self.gotResp, 1, 309)
+        host = 'www.ietf.org'
+        d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt')
+        d.addCallback(self.gotResp, 1, 1070)
          return d
          
      def test_head(self):
          self.manager = PeerManager()
          self.timeout = 10
          
-        host = 'www.camrdale.org'
-        d = self.manager.get(['http://' + host + '/robots.txt'], "HEAD")
+        host = 'www.ietf.org'
+        d = self.manager.get('', 'http://' + host + '/rfc/rfc0013.txt', method = "HEAD")
          d.addCallback(self.gotResp, 1, 0)
          return d
          
@@ -82,20 +87,20 @@ class TestPeerManager(unittest.TestCase):
          lastDefer = defer.Deferred()
          
          def newRequest(host, path, num, expect, last=False):
-            d = self.manager.get(['http://' + host + ':' + str(80) + path])
+            d = self.manager.get('', 'http://' + host + ':' + str(80) + path)
              d.addCallback(self.gotResp, num, expect)
              if last:
                  d.addBoth(lastDefer.callback)
                  
-        newRequest('www.camrdale.org', "/", 1, 3433)
-        newRequest('www.camrdale.org', "/blog/", 2, 39152)
+        newRequest('www.ietf.org', "/rfc/rfc0006.txt", 1, 1776)
+        newRequest('www.ietf.org', "/rfc/rfc2362.txt", 2, 159833)
          newRequest('www.google.ca', "/", 3, None)
          self.pending_calls.append(reactor.callLater(1, newRequest, 'www.sfu.ca', '/', 4, None))
-        self.pending_calls.append(reactor.callLater(10, newRequest, 'www.camrdale.org', '/wikilink.html', 5, 3084))
-        self.pending_calls.append(reactor.callLater(30, newRequest, 'www.camrdale.org', '/sitemap.html', 6, 4756))
+        self.pending_calls.append(reactor.callLater(10, newRequest, 'www.ietf.org', '/rfc/rfc0048.txt', 5, 41696))
+        self.pending_calls.append(reactor.callLater(30, newRequest, 'www.ietf.org', '/rfc/rfc0022.txt', 6, 4606))
          self.pending_calls.append(reactor.callLater(31, newRequest, 'www.sfu.ca', '/studentcentral/index.html', 7, None))
-        self.pending_calls.append(reactor.callLater(32, newRequest, 'www.camrdale.org', '/openid.html', 8, 2525))
-        self.pending_calls.append(reactor.callLater(32, newRequest, 'www.camrdale.org', '/subpage.html', 9, 2381))
+        self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0014.txt', 8, 27))
+        self.pending_calls.append(reactor.callLater(32, newRequest, 'www.ietf.org', '/rfc/rfc0001.txt', 9, 21088))
          self.pending_calls.append(reactor.callLater(62, newRequest, 'www.google.ca', '/intl/en/options/', 0, None, True))
          return lastDefer
          
diff --git a/apt_dht/apt_dht.py b/apt_dht/apt_dht.py

index 264be0f65aba5e499425a9d83feb33a4e0e3d9cc..9d0241d6b0f023861d0fc375aa362f4e250992d8 100644 (file)
--- a/apt_dht/apt_dht.py
+++ b/apt_dht/apt_dht.py
@@ -52,7 +52,7 @@ class AptDHT:
      
      def check_freshness(self, req, path, modtime, resp):
          log.msg('Checking if %s is still fresh' % path)
-        d = self.peers.get([path], "HEAD", modtime)
+        d = self.peers.get('', path, method = "HEAD", modtime = modtime)
          d.addCallback(self.check_freshness_done, req, path, resp)
          return d
      
@@ -132,14 +132,14 @@ class AptDHT:
      def lookupHash_done(self, locations, hash, path, d):
          if not locations:
              log.msg('Peers for %s were not found' % path)
-            getDefer = self.peers.get([path])
+            getDefer = self.peers.get(hash, path)
              getDefer.addCallback(self.cache.save_file, hash, path)
              getDefer.addErrback(self.cache.save_error, path)
              getDefer.addCallbacks(d.callback, d.errback)
          else:
              log.msg('Found peers for %s: %r' % (path, locations))
              # Download from the found peers
-            getDefer = self.peers.get(locations)
+            getDefer = self.peers.get(hash, path, locations)
              getDefer.addCallback(self.check_response, hash, path)
              getDefer.addCallback(self.cache.save_file, hash, path)
              getDefer.addErrback(self.cache.save_error, path)
@@ -148,7 +148,7 @@ class AptDHT:
      def check_response(self, response, hash, path):
          if response.code < 200 or response.code >= 300:
              log.msg('Download from peers failed, going to direct download: %s' % path)
-            getDefer = self.peers.get([path])
+            getDefer = self.peers.get(hash, path)
              return getDefer
          return response
author	Cameron Dale <camrdale@gmail.com>
	Mon, 18 Feb 2008 23:50:48 +0000 (15:50 -0800)
committer	Cameron Dale <camrdale@gmail.com>
	Mon, 18 Feb 2008 23:50:48 +0000 (15:50 -0800)
TODO		patch \| blob \| history
apt_dht/HTTPDownloader.py		patch \| blob \| history
apt_dht/PeerManager.py		patch \| blob \| history
apt_dht/apt_dht.py		patch \| blob \| history