From 70269ec5232c8d56870094b77903b6b70a157996 Mon Sep 17 00:00:00 2001 From: Torsten Dreyer Date: Wed, 18 May 2016 12:51:29 +0200 Subject: [PATCH] Much better terrasync.py - tortellini instead of spaghetti code (use oop) - reuse connection --- scripts/python/terrasync.py | 339 ++++++++++++++++++++---------------- 1 file changed, 193 insertions(+), 146 deletions(-) diff --git a/scripts/python/terrasync.py b/scripts/python/terrasync.py index 74b257dd1..ea10e9bc7 100755 --- a/scripts/python/terrasync.py +++ b/scripts/python/terrasync.py @@ -19,27 +19,113 @@ # # terrasync.py - synchronize terrascenery data to your local disk # needs dnspython (pip install dnspython) -# -import os -import hashlib -import urllib.request +import urllib, os, hashlib +from urllib.parse import urlparse +from http.client import HTTPConnection, _CS_IDLE from os import listdir from os.path import isfile, join -dirindex = ".dirindex" -DIRINDEXVERSION = 1 - -URL="http://flightgear.sourceforge.net/scenery" -# User master repository for now -#URL="automatic" -TARGET="." -QUICK=False -REMOVE_ORPHAN=False - -######################################################################## - -def fn_hash_of_file(fname): +################################################################################################################################# +class HTTPGetCallback: + def __init__(self, src, callback): + self.callback = callback + self.src = src + self.result = None + +class HTTPGetter: + def __init__(self, baseUrl, maxPending=10): + self.baseUrl = baseUrl + self.parsedBaseUrl = urlparse(baseUrl) + self.maxPending = maxPending + self.requests = [] + self.pendingRequests = [] + self.httpConnection = HTTPConnection(self.parsedBaseUrl.netloc,80, True) + self.httpRequestHeaders = headers = {'Host':self.parsedBaseUrl.netloc,'Content-Length':0,'Connection':'Keep-Alive'} + + def get(self, httpGetCallback): + + #self.requests.append(httpGetCallback) + conn = self.httpConnection + request = httpGetCallback + conn.request("GET", self.parsedBaseUrl.path + request.src, None, self.httpRequestHeaders) + try: + httpGetCallback.result = conn.getresponse() + except: + # try to reconnect once + #print("reconnect") + conn.close() + conn.connect() + conn.request("GET", self.parsedBaseUrl.path + request.src, None, self.httpRequestHeaders) + httpGetCallback.result = conn.getresponse() + + httpGetCallback.callback() + #self.requests.remove(httpGetCallback) + +################################################################################################################################# +class DirIndex: + + def __init__(self, dirIndexFile): + self.d = [] + self.f = [] + self.version = 0 + self.path = "" + + with open(dirIndexFile) as f: + self.readFrom(f) + + def readFrom(self, readable): + for line in readable: + line = line.strip() + if line.startswith('#'): + continue + + tokens = line.split(':') + if len(tokens) == 0: + continue + + if tokens[0] == "version": + self.version = int(tokens[1]) + + elif tokens[0] == "path": + self.path = tokens[1] + + elif tokens[0] == "d": + self.d.append({ 'name': tokens[1], 'hash': tokens[2] }) + + elif tokens[0] == "f": + self.f.append({ 'name': tokens[1], 'hash': tokens[2], 'size': tokens[3] }) + + def getVersion(self): + return self.version + + def getPath(self): + return self.path + + def getDirectories(self): + return self.d + + def getFiles(self): + return self.f + +################################################################################################################################# +class HTTPDownloadRequest(HTTPGetCallback): + def __init__(self, terrasync, src, dst, callback = None ): + super().__init__(src, self.callback) + self.terrasync = terrasync + self.dst = dst + self.mycallback = callback + + def callback(self): + with open(self.dst, 'wb') as f: + f.write(self.result.read()) + + if self.mycallback != None: + self.mycallback(self) + +################################################################################################################################# + +def hash_of_file(fname): if not os.path.exists( fname ): return None @@ -53,150 +139,111 @@ def fn_hash_of_file(fname): return hash.hexdigest() -######################################################################## -def do_download_file( _url, _path, _localfile, _hash, _force ): - if os.path.exists( _localfile ) and not _force: - h = fn_hash_of_file(_localfile) - if h == _hash: - #print("hash match for ", _localfile) - return False - - r = urllib.request.urlopen( _url + _path ) - with open(_localfile, 'wb') as f: - f.write( r.read() ) - #print("downloaded ", _localfile, " from ", _url + _path ) - return True - -######################################################################## -def do_terrasync( _url, _path, _localdir, _dirIndexHash ): - url = _url + _path - print(url) - - if not os.path.exists( _localdir ): - os.makedirs( _localdir ) - - # download and process .dirindex as temporary file - # rename to .dirindex after successful processing of directory - # in case of abort, .dirindex.tmp will be removed as orphan - myDirIndexFile = os.path.join(_localdir, ".dirindex.tmp") - - try: - if not do_download_file( url, "/.dirindex", myDirIndexFile, _dirIndexHash, QUICK == False ): - # dirindex hash matches, file not downloaded, skip directory - return - - except urllib.error.HTTPError as err: - if err.code == 404 and _path == "": - # HACK: only the master on SF provides .dirindex for root, fake it if it's missing - print("Using static root hack.") - for _sub in ("Models", "Terrain", "Objects", "Airports" ): - do_terrasync( _url, "/" + _sub, os.path.join(_localdir,_sub), None ) - return - - else: - raise - - with open(myDirIndexFile, 'r') as myDirIndex: - serverFiles = [] - for line in myDirIndex: - tokens = line.rstrip().split(':') - if( len(tokens) == 0 ): - continue - - # TODO: check version number, should be equal to DIRINDEXVERSION - # otherwise complain and terminate - if( tokens[0] == "version" ): - continue - - if( tokens[0] == "path" ): - continue - - if( tokens[0] == "d" ): - do_terrasync( url, "/" + tokens[1], os.path.join(_localdir,tokens[1]), tokens[2] ) - - if( tokens[0] == "f" ): - do_download_file( url, "/" + tokens[1], os.path.join(_localdir,tokens[1]), tokens[2], False ) - serverFiles.append( tokens[1] ) - - os.rename( myDirIndexFile, os.path.join(_localdir, ".dirindex" ) ) - - localFiles = [f for f in listdir(_localdir) if isfile(join(_localdir, f))] - for f in localFiles: - if f != ".dirindex" and not f in serverFiles: - if REMOVE_ORPHAN: - os.remove( os.path.join(_localdir,f) ) - - #TODO: cleanup orphan files - -######################################################################## - -import getopt, sys, random, re +################################################################################################################################# +class TerraSync: -try: - opts, args = getopt.getopt(sys.argv[1:], "u:t:qr", [ "url=", "target=", "quick", "remove-orphan" ]) + def __init__(self, url="http://flightgear.sourceforge.net/scenery", target=".", quick=False, removeOrphan=False): + self.setUrl(url).setTarget(target) + self.quick = quick + self.removeOrphan = removeOrphan + self.httpGetter = None -except getopt.GetoptError: - print("terrasync.py [--url=http://some.server.org/scenery] [--target=/some/path] [-q|--quick] [-r|--remove-orphan]") - sys.exit(2) + def setUrl(self, url): + self.url = url.rstrip('/').strip() + return self -for opt, arg in opts: - if opt in( "-u", "--url"): - URL = arg + def setTarget(self, target): + self.target = target.rstrip('/').strip() + return self - elif opt in ( "-t", "--target"): - TARGET= arg + def start(self): + self.httpGetter = HTTPGetter(self.url) + self.updateDirectory("", "", None ) - elif opt in ("-q", "--quick"): - QUICK = True + def updateFile(self, serverPath, localPath, fileHash ): + localFullPath = join(self.target, localPath) + if fileHash != None and hash_of_file(localFullPath) == fileHash: + #print("hash of file matches, not downloading") + return + + print("downloading ", serverPath ) + + request = HTTPDownloadRequest(self, serverPath, localFullPath ) + self.httpGetter.get(request) - elif opt in ("-r", "--remove-orphan"): - REMOVE_ORPHAN = True -# automatic URL lookup from DNS NAPTR -# - lookup terrasync.flightgear.org, type=NAPTR, service="ws20", flags="U" -# - sort by order,preference ascending -# - pick entries with lowest order and preference -# - randomly pick one of those -# - use regexp fields URL -if URL == "automatic": - import dns.resolver - dnsResolver = dns.resolver.Resolver() + def updateDirectory(self, serverPath, localPath, dirIndexHash): + print("processing ", serverPath) - order = -1 - preference = -1 + localFullPath = join(self.target, localPath) + if not os.path.exists( localFullPath ): + os.makedirs( localFullPath ) - # find lowes preference/order for service 'ws20' and flags 'U' - dnsAnswer = dnsResolver.query("terrasync.flightgear.org", "NAPTR" ) - for naptr in dnsAnswer: - if naptr.service != b'ws20' or naptr.flags != b'U': - continue + localDirIndex = join(localFullPath, ".dirindex") + if dirIndexHash != None and hash_of_file(localDirIndex) == dirIndexHash: + # print("hash of dirindex matches, not downloading") + if not self.quick: + self.handleDirindexFile( localDirIndex ) + else: + request = HTTPDownloadRequest(self, serverPath + "/.dirindex", localDirIndex, self.handleDirindexRequest ) + self.httpGetter.get(request) - if order == -1 or naptr.order < order: - order = naptr.order - preference = naptr.preference + def handleDirindexRequest(self, dirindexRequest): + self.handleDirindexFile(dirindexRequest.dst) - if order == naptr.order: - if naptr.preference < preference: - preference = naptr.preference + def handleDirindexFile(self, dirindexFile): + dirIndex = DirIndex(dirindexFile) + serverFiles = [] + for file in dirIndex.getFiles(): + f = file['name'] + h = file['hash'] + self.updateFile( "/" + dirIndex.getPath() + "/" + f, join(dirIndex.getPath(),f), h ) + serverFiles.append(f) - # grab candidats - candidates = [] - for naptr in dnsAnswer: - if naptr.service != b'ws20' or naptr.flags != b'U' or naptr.preference != preference or naptr.order != order: - continue + for subdir in dirIndex.getDirectories(): + d = subdir['name'] + h = subdir['hash'] + self.updateDirectory( "/" + dirIndex.getPath() + "/" + d, join(dirIndex.getPath(),d), h ) - candidates.append( naptr.regexp.decode('utf-8') ) + if self.removeOrphan: + localFullPath = join(self.target, dirIndex.getPath()) + localFiles = [f for f in listdir(localFullPath) if isfile(join(localFullPath, f))] + for f in localFiles: + if f != ".dirindex" and not f in serverFiles: + #print("removing orphan", join(localFullPath,f) ) + os.remove( join(localFullPath,f) ) - if not candidates: - print("sorry, no terrascenery URLs found. You may specify one with --url=http://some.url.org/foo") - sys.exit(3) - _url = random.choice(candidates) - _subst = _url.split(_url[0]) # split string, first character is separator regexreplacement - URL = re.sub(_subst[1], _subst[2], "" ) # apply regex substitude on empty string + def isReady(self): + return self.httpGetter and self.httpGetter.isReady() + return False -print( "terrasyncing from ", URL, "to ", TARGET ) -do_terrasync( URL, "", TARGET, None ) + def update(self): + if self.httpGetter: + self.httpGetter.update() + +################################################################################################################################# +import getopt, sys +try: + opts, args = getopt.getopt(sys.argv[1:], "u:t:qr", [ "url=", "target=", "quick", "remove-orphan" ]) + +except getopt.GetoptError: + print("terrasync.py [--url=http://some.server.org/scenery] [--target=/some/path] [-q|--quick] [-r|--remove-orphan]") + sys.exit(2) + +terraSync = TerraSync() +for opt, arg in opts: + if opt in("-u", "--url"): + terraSync.url = arg + + elif opt in ("-t", "--target"): + terraSync.target = arg + + elif opt in ("-q", "--quick"): + terraSync.quick = True + + elif opt in ("-r", "--remove-orphan"): + terraSync.removeOrphan = True -######################################################################## +terraSync.start() -- 2.39.5