from urlparse import urlparse, urlunparse
from twisted.internet import reactor, defer
+from twisted.python import log
from twisted.trial import unittest
from twisted.web2 import stream as stream_mod
+from twisted.web2.http import splitHostPort
from HTTPDownloader import HTTPClientManager
@var locations: a list of the locations where the file can be found
"""
url = choice(locations)
+ log.msg('Downloading %s' % url)
parsed = urlparse(url)
- assert(parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0])
- host = parsed[1]
+ assert parsed[0] == "http", "Only HTTP is supported, not '%s'" % parsed[0]
+ host, port = splitHostPort(parsed[0], parsed[1])
path = urlunparse(('', '') + parsed[2:])
-
- # Make sure a port is included for consistency
- if host.find(':') >= 0:
- host, port = host.split(':', 1)
- port = int(port)
- else:
- port = 80
+
return self.getPeer(host, port, path, method, modtime)
def getPeer(self, host, port, path, method="GET", modtime=None):
d.addBoth(lastDefer.callback)
newRequest('www.camrdale.org', "/", 1, 3433)
- newRequest('www.camrdale.org', "/blog/", 2, 37121)
+ newRequest('www.camrdale.org', "/blog/", 2, 39152)
newRequest('www.google.ca', "/", 3, None)
self.pending_calls.append(reactor.callLater(1, newRequest, 'www.sfu.ca', '/', 4, None))
self.pending_calls.append(reactor.callLater(10, newRequest, 'www.camrdale.org', '/wikilink.html', 5, 3084))
- self.pending_calls.append(reactor.callLater(30, newRequest, 'www.camrdale.org', '/sitemap.html', 6, 4750))
+ self.pending_calls.append(reactor.callLater(30, newRequest, 'www.camrdale.org', '/sitemap.html', 6, 4756))
self.pending_calls.append(reactor.callLater(31, newRequest, 'www.sfu.ca', '/studentcentral/index.html', 7, None))
self.pending_calls.append(reactor.callLater(32, newRequest, 'www.camrdale.org', '/openid.html', 8, 2525))
self.pending_calls.append(reactor.callLater(32, newRequest, 'www.camrdale.org', '/subpage.html', 9, 2381))