from twisted.python import log
from twisted.trial import unittest
from twisted.web2 import stream
-from twisted.web2.http import splitHostPort
+from twisted.web2.http import Response, splitHostPort
from HTTPDownloader import Peer
from util import uncompact
-from hash import PIECE_SIZE
+from Hash import PIECE_SIZE
from apt_p2p_Khashmir.bencode import bdecode
class GrowingFileStream(stream.FileStream):
CHUNK_SIZE = 4*1024
- def __init__(self, f):
+ def __init__(self, f, length = None):
stream.FileStream.__init__(self, f)
- self.length = None
+ self.length = length
self.deferred = None
self.available = 0L
self.position = 0L
self.finished = False
- def updateAvaliable(self, newlyAvailable):
+ def updateAvailable(self, newlyAvailable):
"""Update the number of bytes that are available.
Call it with 0 to trigger reading of a fully read file.
deferred.callback(b)
def allAvailable(self):
- """Indicate that no more data is coming available."""
+ """Indicate that no more data will be coming available."""
self.finished = True
# If a read is pending, let it go
deferred.callback(b)
else:
# We're done
+ deferred = self.deferred
+ self.deferred = None
deferred.callback(None)
else:
# We're done
+ deferred = self.deferred
+ self.deferred = None
deferred.callback(None)
def read(self, sendfile=False):
self.position += bytesRead
return b
-class StreamToFile(defer.Deferred):
- """Saves a stream to a file.
+class StreamToFile:
+ """Save a stream to a partial file and hash it.
@type stream: L{twisted.web2.stream.IByteStream}
@ivar stream: the input stream being read
@type outFile: L{twisted.python.filepath.FilePath}
@ivar outFile: the file being written
- @type hash: L{Hash.HashObject}
- @ivar hash: the hash object for the file
+ @type hash: C{sha1}
+ @ivar hash: the hash object for the data
+ @type position: C{int}
+ @ivar position: the current file position to write the next data to
@type length: C{int}
- @ivar length: the length of the original (compressed) file
+ @ivar length: the position in the file to not write beyond
@type doneDefer: L{twisted.internet.defer.Deferred}
- @ivar doneDefer: the deferred that will fire when done streaming
+ @ivar doneDefer: the deferred that will fire when done writing
"""
- def __init__(self, inputStream, outFile, hash, start, length):
+ def __init__(self, inputStream, outFile, start = 0, length = None):
"""Initializes the file.
@type inputStream: L{twisted.web2.stream.IByteStream}
@param inputStream: the input stream to read from
@type outFile: L{twisted.python.filepath.FilePath}
@param outFile: the file to write to
- @type hash: L{Hash.HashObject}
- @param hash: the hash object to use for the file
+ @type start: C{int}
+ @param start: the file position to start writing at
+ (optional, defaults to the start of the file)
+ @type length: C{int}
+ @param length: the maximum amount of data to write to the file
+ (optional, defaults to not limiting the writing to the file
"""
self.stream = inputStream
- self.outFile = outFile.open('w')
- self.hash = hash
- self.hash.new()
- self.length = self.stream.length
+ self.outFile = outFile
+ self.hash = sha.new()
+ self.position = start
+ self.length = None
+ if length is not None:
+ self.length = start + length
+ self.doneDefer = None
def run(self):
- """Start the streaming."""
- self.doneDefer = stream.readStream(self.stream, _gotData)
+ """Start the streaming.
+
+ @rtype: L{twisted.internet.defer.Deferred}
+ """
+ log.msg('Started streaming %r bytes to file at position %d' % (self.length, self.position))
+ self.doneDefer = stream.readStream(self.stream, self._gotData)
self.doneDefer.addCallbacks(self._done, self._error)
return self.doneDefer
- def _done(self):
- """Close all the output files, return the result."""
- if not self.outFile.closed:
- self.outFile.close()
- self.hash.digest()
- self.doneDefer.callback(self.hash)
-
def _gotData(self, data):
- self.peers[site]['pieces'] += data
-
- def read(self):
- """Read some data from the stream."""
+ """Process the received data."""
if self.outFile.closed:
- return None
+ raise Exception, "outFile was unexpectedly closed"
- # Read data from the stream, deal with the possible deferred
- data = self.stream.read()
- if isinstance(data, defer.Deferred):
- data.addCallbacks(self._write, self._done)
- return data
-
- self._write(data)
- return data
-
- def _write(self, data):
- """Write the stream data to the file and return it for others to use.
-
- Also optionally decompresses it.
- """
if data is None:
- self._done()
- return data
+ raise Exception, "Data is None?"
+
+ # Make sure we don't go too far
+ if self.length is not None and self.position + len(data) > self.length:
+ data = data[:(self.length - self.position)]
# Write and hash the streamed data
+ self.outFile.seek(self.position)
self.outFile.write(data)
self.hash.update(data)
+ self.position += len(data)
- return data
+ def _done(self, result):
+ """Return the result."""
+ log.msg('Streaming is complete')
+ return self.hash.digest()
+
+ def _error(self, err):
+ """Log the error."""
+ log.msg('Streaming error')
+ log.err(err)
+ return err
- def close(self):
- """Clean everything up and return None to future reads."""
- self.length = 0
- self._done()
- self.stream.close()
-
-
class FileDownload:
"""Manage a download from a list of peers or a mirror.
-
+ @type manager: L{PeerManager}
+ @ivar manager: the manager to send requests for peers to
+ @type hash: L{Hash.HashObject}
+ @ivar hash: the hash object containing the expected hash for the file
+ @ivar mirror: the URI of the file on the mirror
+ @type compact_peers: C{list} of C{dictionary}
+ @ivar compact_peers: a list of the peer info where the file can be found
+ @type file: C{file}
+ @ivar file: the open file to right the download to
+ @type path: C{string}
+ @ivar path: the path to request from peers to access the file
+ @type pieces: C{list} of C{string}
+ @ivar pieces: the hashes of the pieces in the file
+ @type started: C{boolean}
+ @ivar started: whether the download has begun yet
+ @type defer: L{twisted.internet.defer.Deferred}
+ @ivar defer: the deferred that will callback with the result of the download
+ @type peers: C{dictionary}
+ @ivar peers: information about each of the peers available to download from
+ @type outstanding: C{int}
+ @ivar outstanding: the number of requests to peers currently outstanding
+ @type peerlist: C{list} of L{HTTPDownloader.Peer}
+ @ivar peerlist: the sorted list of peers for this download
+ @type stream: L{GrowingFileStream}
+ @ivar stream: the stream of resulting data from the download
+ @type nextFinish: C{int}
+ @ivar nextFinish: the next piece that is needed to finish for the stream
+ @type completePieces: C{list} of C{boolean} or L{HTTPDownloader.Peer}
+ @ivar completePieces: one per piece, will be False if no requests are
+ outstanding for the piece, True if the piece has been successfully
+ downloaded, or the Peer that a request for this piece has been sent
"""
def __init__(self, manager, hash, mirror, compact_peers, file):
"""Initialize the instance and check for piece hashes.
+ @type manager: L{PeerManager}
+ @param manager: the manager to send requests for peers to
@type hash: L{Hash.HashObject}
@param hash: the hash object containing the expected hash for the file
@param mirror: the URI of the file on the mirror
- @type compact_peers: C{list} of C{string}
+ @type compact_peers: C{list} of C{dictionary}
@param compact_peers: a list of the peer info where the file can be found
@type file: L{twisted.python.filepath.FilePath}
@param file: the temporary file to use to store the downloaded file
file.restat(False)
if file.exists():
file.remove()
- self.file = file.open('w')
+ self.file = file.open('w+')
def run(self):
"""Start the downloading process."""
+ log.msg('Checking for pieces for %s' % self.path)
self.defer = defer.Deferred()
self.peers = {}
no_pieces = 0
- pieces_string = {}
- pieces_hash = {}
- pieces_dl_hash = {}
+ pieces_string = {0: 0}
+ pieces_hash = {0: 0}
+ pieces_dl_hash = {0: 0}
for compact_peer in self.compact_peers:
# Build a list of all the peers for this download
site = uncompact(compact_peer['c'])
- peer = manager.getPeer(site)
+ peer = self.manager.getPeer(site)
self.peers.setdefault(site, {})['peer'] = peer
# Extract any piece information from the peers list
if max_found == no_pieces:
# The file is not split into pieces
+ log.msg('No pieces were found for the file')
self.pieces = []
self.startDownload()
elif max_found == max(pieces_string.values()):
# Find the most popular piece string
if num == max_found:
self.pieces = [pieces[x:x+20] for x in xrange(0, len(pieces), 20)]
+ log.msg('Peer info contained %d piece hashes' % len(self.pieces))
self.startDownload()
break
elif max_found == max(pieces_hash.values()):
for pieces, num in pieces_hash.items():
# Find the most popular piece hash to lookup
if num == max_found:
+ log.msg('Found a hash for pieces to lookup in the DHT: %r' % pieces)
self.getDHTPieces(pieces)
break
elif max_found == max(pieces_dl_hash.values()):
for pieces, num in pieces_hash.items():
# Find the most popular piece hash to download
if num == max_found:
+ log.msg('Found a hash for pieces to lookup in peers: %r' % pieces)
self.getPeerPieces(pieces)
break
return self.defer
@param key: the key to request from the peers
"""
if failedSite is None:
+ log.msg('Starting the lookup of piece hashes in peers')
self.outstanding = 0
# Remove any peers with the wrong piece hash
#for site in self.peers.keys():
# if self.peers[site].get('l', '') != key:
# del self.peers[site]
else:
+ log.msg('Piece hash lookup failed for peer %r' % (failedSite, ))
self.peers[failedSite]['failed'] = True
self.outstanding -= 1
if self.pieces is None:
# Send a request to one or more peers
+ log.msg('Checking for a peer to request piece hashes from')
for site in self.peers:
if self.peers[site].get('failed', False) != True:
+ log.msg('Sending a piece hash request to %r' % (site, ))
path = '/~/' + quote_plus(key)
lookupDefer = self.peers[site]['peer'].get(path)
- lookupDefer.addCallbacks(self._getPeerPieces, self._gotPeerError,
- callbackArgs=(key, site), errbackArgs=(key, site))
+ reactor.callLater(0, lookupDefer.addCallbacks,
+ *(self._getPeerPieces, self._gotPeerError),
+ **{'callbackArgs': (key, site),
+ 'errbackArgs': (key, site)})
self.outstanding += 1
if self.outstanding >= 3:
break
+ log.msg('Done sending piece hash requests for now, %d outstanding' % self.outstanding)
if self.pieces is None and self.outstanding == 0:
# Continue without the piece hashes
log.msg('Could not retrieve the piece hashes from the peers')
def _getPeerPieces(self, response, key, site):
"""Process the retrieved response from the peer."""
+ log.msg('Got a piece hash response %d from %r' % (response.code, site))
if response.code != 200:
# Request failed, try a different peer
+ log.msg('Did not like response %d from %r' % (response.code, site))
self.getPeerPieces(key, site)
else:
# Read the response stream to a string
self.peers[site]['pieces'] = ''
def _gotPeerPiece(data, self = self, site = site):
+ log.msg('Peer %r got %d bytes of piece hashes' % (site, len(data)))
self.peers[site]['pieces'] += data
+ log.msg('Streaming piece hashes from peer')
df = stream.readStream(response.stream, _gotPeerPiece)
df.addCallbacks(self._gotPeerPieces, self._gotPeerError,
callbackArgs=(key, site), errbackArgs=(key, site))
def _gotPeerError(self, err, key, site):
"""Peer failed, try again."""
+ log.msg('Peer piece hash request failed for %r' % (site, ))
log.err(err)
self.getPeerPieces(key, site)
def _gotPeerPieces(self, result, key, site):
"""Check the retrieved pieces from the peer."""
+ log.msg('Finished streaming piece hashes from peer %r' % (site, ))
if self.pieces is not None:
# Already done
+ log.msg('Already done')
return
try:
result = bdecode(self.peers[site]['pieces'])
except:
+ log.msg('Error bdecoding piece hashes')
log.err()
self.getPeerPieces(key, site)
return
if result_hash == key:
pieces = result['t']
self.pieces = [pieces[x:x+20] for x in xrange(0, len(pieces), 20)]
- log.msg('Retrieved %d piece hashes from the peer' % len(self.pieces))
+ log.msg('Retrieved %d piece hashes from the peer %r' % (len(self.pieces), site))
self.startDownload()
else:
log.msg('Peer returned a piece string that did not match')
if self.started:
return
+ log.msg('Starting to download %s' % self.path)
self.started = True
assert self.pieces is not None, "You must initialize the piece hashes first"
self.peerlist = [self.peers[site]['peer'] for site in self.peers]
self.defer.callback(self.peerlist[0].get(self.path))
return
- self.sort()
+ # Start sending the return file
+ self.stream = GrowingFileStream(self.file, self.hash.expSize)
+ resp = Response(200, {}, self.stream)
+ self.defer.callback(resp)
+
+ # Begin to download the pieces
self.outstanding = 0
- self.next_piece = 0
+ self.nextFinish = 0
+ if self.pieces:
+ self.completePieces = [False for piece in self.pieces]
+ else:
+ self.completePieces = [False]
+ self.getPieces()
- while self.outstanding < 3 and self.peerlist and self.next_piece < len(self.pieces):
- peer = self.peerlist.pop()
- piece = self.next_piece
- self.next_piece += 1
-
- self.outstanding += 1
- df = peer.getRange(self.path, piece*PIECE_SIZE, (piece+1)*PIECE_SIZE - 1)
- df.addCallbacks(self._gotPiece, self._gotError,
- callbackArgs=(piece, peer), errbackArgs=(piece, peer))
+ #{ Downloading the pieces
+ def getPieces(self):
+ """Download the next pieces from the peers."""
+ log.msg('Checking for more piece requests to send')
+ self.sort()
+ piece = self.nextFinish
+ while self.outstanding < 4 and self.peerlist and piece < len(self.completePieces):
+ log.msg('Checking piece %d' % piece)
+ if self.completePieces[piece] == False:
+ # Send a request to the highest ranked peer
+ peer = self.peerlist.pop()
+ self.completePieces[piece] = peer
+ log.msg('Sending a request for piece %d to peer %r' % (piece, peer))
+
+ self.outstanding += 1
+ if self.pieces:
+ df = peer.getRange(self.path, piece*PIECE_SIZE, (piece+1)*PIECE_SIZE - 1)
+ else:
+ df = peer.get(self.path)
+ reactor.callLater(0, df.addCallbacks,
+ *(self._getPiece, self._getError),
+ **{'callbackArgs': (piece, peer),
+ 'errbackArgs': (piece, peer)})
+ piece += 1
+
+ log.msg('Finished checking pieces, %d outstanding, next piece %d of %d' % (self.outstanding, self.nextFinish, len(self.completePieces)))
+ # Check if we're done
+ if self.outstanding <= 0 and self.nextFinish >= len(self.completePieces):
+ log.msg('We seem to be done with all pieces')
+ self.stream.allAvailable()
- def _gotPiece(self, response, piece, peer):
- """Process the retrieved piece from the peer."""
- if response.code != 206:
+ def _getPiece(self, response, piece, peer):
+ """Process the retrieved headers from the peer."""
+ log.msg('Got response for piece %d from peer %r' % (piece, peer))
+ if ((len(self.completePieces) > 1 and response.code != 206) or
+ (response.code not in (200, 206))):
# Request failed, try a different peer
- self.getPeerPieces(key, site)
+ log.msg('Wrong response type %d for piece %d from peer %r' % (response.code, piece, peer))
+ peer.hashError('Peer responded with the wrong type of download: %r' % response.code)
+ self.completePieces[piece] = False
+ if response.stream and response.stream.length:
+ stream.readAndDiscard(response.stream)
else:
# Read the response stream to the file
- df = StreamToFile(response.stream, self.file, self.hash, piece*PIECE_SIZE, PIECE_SIZE).run()
- df.addCallbacks(self._gotPeerPieces, self._gotPeerError,
- callbackArgs=(key, site), errbackArgs=(key, site))
+ log.msg('Streaming piece %d from peer %r' % (piece, peer))
+ if response.code == 206:
+ df = StreamToFile(response.stream, self.file, piece*PIECE_SIZE, PIECE_SIZE).run()
+ else:
+ df = StreamToFile(response.stream, self.file).run()
+ df.addCallbacks(self._gotPiece, self._gotError,
+ callbackArgs=(piece, peer), errbackArgs=(piece, peer))
- def _gotError(self, err, piece, peer):
+ self.outstanding -= 1
+ self.peerlist.append(peer)
+ self.getPieces()
+
+ def _getError(self, err, piece, peer):
"""Peer failed, try again."""
+ log.msg('Got error for piece %d from peer %r' % (piece, peer))
+ self.outstanding -= 1
+ self.peerlist.append(peer)
+ self.completePieces[piece] = False
+ self.getPieces()
log.err(err)
+ def _gotPiece(self, response, piece, peer):
+ """Process the retrieved piece from the peer."""
+ log.msg('Finished streaming piece %d from peer %r: %r' % (piece, peer, response))
+ if ((self.pieces and response != self.pieces[piece]) or
+ (len(self.pieces) == 0 and response != self.hash.expected())):
+ # Hash doesn't match
+ log.msg('Hash error for piece %d from peer %r' % (piece, peer))
+ peer.hashError('Piece received from peer does not match expected')
+ self.completePieces[piece] = False
+ elif self.pieces:
+ # Successfully completed one of several pieces
+ log.msg('Finished with piece %d from peer %r' % (piece, peer))
+ self.completePieces[piece] = True
+ while (self.nextFinish < len(self.completePieces) and
+ self.completePieces[self.nextFinish] == True):
+ self.nextFinish += 1
+ self.stream.updateAvailable(PIECE_SIZE)
+ else:
+ # Whole download (only one piece) is complete
+ log.msg('Piece %d from peer %r is the last piece' % (piece, peer))
+ self.completePieces[piece] = True
+ self.nextFinish = 1
+ self.stream.updateAvailable(2**30)
+
+ self.getPieces()
+
+ def _gotError(self, err, piece, peer):
+ """Piece download failed, try again."""
+ log.msg('Error streaming piece %d from peer %r: %r' % (piece, peer, response))
+ log.err(err)
+ self.completePieces[piece] = False
+ self.getPieces()
class PeerManager:
"""Manage a set of peers and the requests to them.
+ @type cache_dir: L{twisted.python.filepath.FilePath}
+ @ivar cache_dir: the directory to use for storing all files
+ @type dht: L{interfaces.IDHT}
+ @ivar dht: the DHT instance
@type clients: C{dictionary}
@ivar clients: the available peers that have been previously contacted
"""