Moved the files to appropriate package directories.
[quix0rs-apt-p2p.git] / apt_dht / AptPackages.py
1 # Disable the FutureWarning from the apt module
2 import warnings
3 warnings.simplefilter("ignore", FutureWarning)
4
5 import os, shelve
6 from random import choice
7 from shutil import rmtree
8 from copy import deepcopy
9 from UserDict import DictMixin
10
11 from twisted.internet import threads, defer
12 from twisted.python import log
13 from twisted.trial import unittest
14
15 import apt_pkg, apt_inst
16 from apt import OpProgress
17
18 apt_pkg.init()
19
20 class PackageFileList(DictMixin):
21     """Manages a list of package files belonging to a backend.
22     
23     @type packages: C{shelve dictionary}
24     @ivar packages: the files stored for this backend
25     """
26     
27     def __init__(self, cache_dir):
28         self.cache_dir = cache_dir
29         if not os.path.exists(self.cache_dir):
30             os.makedirs(self.cache_dir)
31         self.packages = None
32         self.open()
33
34     def open(self):
35         """Open the persistent dictionary of files in this backend."""
36         if self.packages is None:
37             self.packages = shelve.open(self.cache_dir+'/packages.db')
38
39     def close(self):
40         """Close the persistent dictionary."""
41         if self.packages is not None:
42             self.packages.close()
43
44     def update_file(self, cache_path, file_path):
45         """Check if an updated file needs to be tracked.
46
47         Called from the mirror manager when files get updated so we can update our
48         fake lists and sources.list.
49         """
50         filename = cache_path.split('/')[-1]
51         if filename=="Packages" or filename=="Release" or filename=="Sources":
52             log.msg("Registering package file: "+cache_path)
53             self.packages[cache_path] = file_path
54             return True
55         return False
56
57     def check_files(self):
58         """Check all files in the database to make sure they exist."""
59         files = self.packages.keys()
60         for f in files:
61             if not os.path.exists(self.packages[f]):
62                 log.msg("File in packages database has been deleted: "+f)
63                 del self.packages[f]
64
65     # Standard dictionary implementation so this class can be used like a dictionary.
66     def __getitem__(self, key): return self.packages[key]
67     def __setitem__(self, key, item): self.packages[key] = item
68     def __delitem__(self, key): del self.packages[key]
69     def keys(self): return self.packages.keys()
70
71 class AptPackages:
72     """Uses python-apt to answer queries about packages.
73
74     Makes a fake configuration for python-apt for each backend.
75     """
76
77     DEFAULT_APT_CONFIG = {
78         #'APT' : '',
79         #'APT::Architecture' : 'i386',  # Commented so the machine's config will set this
80         #'APT::Default-Release' : 'unstable',
81         'Dir':'.', # /
82         'Dir::State' : 'apt/', # var/lib/apt/
83         'Dir::State::Lists': 'lists/', # lists/
84         #'Dir::State::cdroms' : 'cdroms.list',
85         'Dir::State::userstatus' : 'status.user',
86         'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
87         'Dir::Cache' : '.apt/cache/', # var/cache/apt/
88         #'Dir::Cache::archives' : 'archives/',
89         'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
90         'Dir::Cache::pkgcache' : 'pkgcache.bin',
91         'Dir::Etc' : 'apt/etc/', # etc/apt/
92         'Dir::Etc::sourcelist' : 'sources.list',
93         'Dir::Etc::vendorlist' : 'vendors.list',
94         'Dir::Etc::vendorparts' : 'vendors.list.d',
95         #'Dir::Etc::main' : 'apt.conf',
96         #'Dir::Etc::parts' : 'apt.conf.d',
97         #'Dir::Etc::preferences' : 'preferences',
98         'Dir::Bin' : '',
99         #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
100         'Dir::Bin::dpkg' : '/usr/bin/dpkg',
101         #'DPkg' : '',
102         #'DPkg::Pre-Install-Pkgs' : '',
103         #'DPkg::Tools' : '',
104         #'DPkg::Tools::Options' : '',
105         #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
106         #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
107         #'DPkg::Post-Invoke' : '',
108         }
109     essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
110                       'apt/lists/partial')
111     essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
112         
113     def __init__(self, cache_dir):
114         """Construct a new packages manager.
115
116         @ivar backendName: name of backend associated with this packages file
117         @ivar cache_dir: cache directory from config file
118         """
119         self.cache_dir = cache_dir
120         self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
121
122         for dir in self.essential_dirs:
123             path = os.path.join(self.cache_dir, dir)
124             if not os.path.exists(path):
125                 os.makedirs(path)
126         for file in self.essential_files:
127             path = os.path.join(self.cache_dir, file)
128             if not os.path.exists(path):
129                 f = open(path,'w')
130                 f.close()
131                 del f
132                 
133         self.apt_config['Dir'] = self.cache_dir
134         self.apt_config['Dir::State::status'] = os.path.join(self.cache_dir, 
135                       self.apt_config['Dir::State'], self.apt_config['Dir::State::status'])
136         self.packages = PackageFileList(cache_dir)
137         self.loaded = 0
138         self.loading = None
139         
140     def __del__(self):
141         self.cleanup()
142         self.packages.close()
143         
144     def addRelease(self, cache_path, file_path):
145         """Dirty hack until python-apt supports apt-pkg/indexrecords.h
146         (see Bug #456141)
147         """
148         self.indexrecords[cache_path] = {}
149
150         read_packages = False
151         f = open(file_path, 'r')
152         
153         for line in f:
154             line = line.rstrip()
155     
156             if line[:1] != " ":
157                 read_packages = False
158                 try:
159                     # Read the various headers from the file
160                     h, v = line.split(":", 1)
161                     if h == "MD5Sum" or h == "SHA1" or h == "SHA256":
162                         read_packages = True
163                         hash_type = h
164                 except:
165                     # Bad header line, just ignore it
166                     log.msg("WARNING: Ignoring badly formatted Release line: %s" % line)
167     
168                 # Skip to the next line
169                 continue
170             
171             # Read file names from the multiple hash sections of the file
172             if read_packages:
173                 p = line.split()
174                 self.indexrecords[cache_path].setdefault(p[2], {})[hash_type] = (p[0], p[1])
175         
176         f.close()
177
178     def file_updated(self, cache_path, file_path):
179         """A file in the backend has changed, manage it.
180         
181         If this affects us, unload our apt database
182         """
183         if self.packages.update_file(cache_path, file_path):
184             self.unload()
185
186     def load(self):
187         """Make sure the package is initialized and loaded."""
188         if self.loading is None:
189             self.loading = threads.deferToThread(self._load)
190             self.loading.addCallback(self.doneLoading)
191         return self.loading
192         
193     def doneLoading(self, loadResult):
194         """Cache is loaded."""
195         self.loading = None
196         # Must pass on the result for the next callback
197         return loadResult
198         
199     def _load(self):
200         """Regenerates the fake configuration and load the packages cache."""
201         if self.loaded: return True
202         apt_pkg.InitSystem()
203         rmtree(os.path.join(self.cache_dir, self.apt_config['Dir::State'], 
204                             self.apt_config['Dir::State::Lists']))
205         os.makedirs(os.path.join(self.cache_dir, self.apt_config['Dir::State'], 
206                                  self.apt_config['Dir::State::Lists'], 'partial'))
207         sources_filename = os.path.join(self.cache_dir, self.apt_config['Dir::Etc'], 
208                                         self.apt_config['Dir::Etc::sourcelist'])
209         sources = open(sources_filename, 'w')
210         sources_count = 0
211         self.packages.check_files()
212         self.indexrecords = {}
213         for f in self.packages:
214             # we should probably clear old entries from self.packages and
215             # take into account the recorded mtime as optimization
216             filepath = self.packages[f]
217             if f.split('/')[-1] == "Release":
218                 self.addRelease(f, filepath)
219             fake_uri='http://apt-dht'+f
220             fake_dirname = '/'.join(fake_uri.split('/')[:-1])
221             if f.endswith('Sources'):
222                 source_line='deb-src '+fake_dirname+'/ /'
223             else:
224                 source_line='deb '+fake_dirname+'/ /'
225             listpath=(os.path.join(self.cache_dir, self.apt_config['Dir::State'], 
226                                    self.apt_config['Dir::State::Lists'], 
227                                    apt_pkg.URItoFileName(fake_uri)))
228             sources.write(source_line+'\n')
229             log.msg("Sources line: " + source_line)
230             sources_count = sources_count + 1
231
232             try:
233                 #we should empty the directory instead
234                 os.unlink(listpath)
235             except:
236                 pass
237             os.symlink(filepath, listpath)
238         sources.close()
239
240         if sources_count == 0:
241             log.msg("No Packages files available for %s backend"%(self.cache_dir))
242             return False
243
244         log.msg("Loading Packages database for "+self.cache_dir)
245         for key, value in self.apt_config.items():
246             apt_pkg.Config[key] = value
247
248         self.cache = apt_pkg.GetCache(OpProgress())
249         self.records = apt_pkg.GetPkgRecords(self.cache)
250         self.srcrecords = apt_pkg.GetPkgSrcRecords()
251
252         self.loaded = 1
253         return True
254
255     def unload(self):
256         """Tries to make the packages server quit."""
257         if self.loaded:
258             del self.cache
259             del self.records
260             del self.srcrecords
261             del self.indexrecords
262             self.loaded = 0
263
264     def cleanup(self):
265         """Cleanup and close any loaded caches."""
266         self.unload()
267         self.packages.close()
268         
269     def findHash(self, path):
270         """Find the hash for a given path in this mirror.
271         
272         Returns a deferred so it can make sure the cache is loaded first.
273         """
274         d = defer.Deferred()
275
276         deferLoad = self.load()
277         deferLoad.addCallback(self._findHash, path, d)
278         
279         return d
280
281     def _findHash(self, loadResult, path, d):
282         """Really find the hash for a path.
283         
284         Have to pass the returned loadResult on in case other calls to this
285         function are pending.
286         """
287         if not loadResult:
288             d.callback((None, None))
289             return loadResult
290         
291         # First look for the path in the cache of index files
292         for release in self.indexrecords:
293             if path.startswith(release[:-7]):
294                 for indexFile in self.indexrecords[release]:
295                     if release[:-7] + indexFile == path:
296                         d.callback(self.indexrecords[release][indexFile]['SHA1'])
297                         return loadResult
298         
299         package = path.split('/')[-1].split('_')[0]
300
301         # Check the binary packages
302         try:
303             for version in self.cache[package].VersionList:
304                 size = version.Size
305                 for verFile in version.FileList:
306                     if self.records.Lookup(verFile):
307                         if '/' + self.records.FileName == path:
308                             d.callback((self.records.SHA1Hash, size))
309                             return loadResult
310         except KeyError:
311             pass
312
313         # Check the source packages' files
314         self.srcrecords.Restart()
315         if self.srcrecords.Lookup(package):
316             for f in self.srcrecords.Files:
317                 if path == '/' + f[2]:
318                     d.callback((f[0], f[1]))
319                     return loadResult
320         
321         d.callback((None, None))
322         return loadResult
323
324 class TestAptPackages(unittest.TestCase):
325     """Unit tests for the AptPackages cache."""
326     
327     pending_calls = []
328     client = None
329     packagesFile = ''
330     sourcesFile = ''
331     releaseFile = ''
332     
333     def setUp(self):
334         self.client = AptPackages('/tmp/.apt-dht')
335     
336         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "Packages$" | tail -n 1').read().rstrip('\n')
337         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "Sources$" | tail -n 1').read().rstrip('\n')
338         for f in os.walk('/var/lib/apt/lists').next()[2]:
339             if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
340                 self.releaseFile = f
341                 break
342         
343         self.client.file_updated(self.releaseFile[self.releaseFile.find('_debian_')+1:].replace('_','/'), 
344                                  '/var/lib/apt/lists/' + self.releaseFile)
345         self.client.file_updated(self.packagesFile[self.packagesFile.find('_debian_')+1:].replace('_','/'), 
346                                  '/var/lib/apt/lists/' + self.packagesFile)
347         self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_debian_')+1:].replace('_','/'), 
348                                  '/var/lib/apt/lists/' + self.sourcesFile)
349     
350     def test_pkg_hash(self):
351         self.client._load()
352
353         self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
354         
355         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
356                             '/var/lib/apt/lists/' + self.packagesFile + 
357                             ' | grep -E "^SHA1:" | head -n 1' + 
358                             ' | cut -d\  -f 2').read().rstrip('\n')
359
360         self.failUnless(self.client.records.SHA1Hash == pkg_hash, 
361                         "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
362
363     def test_src_hash(self):
364         self.client._load()
365
366         self.client.srcrecords.Lookup('dpkg')
367
368         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
369                             '/var/lib/apt/lists/' + self.sourcesFile + 
370                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
371                             ' | cut -d\  -f 2').read().split('\n')[:-1]
372
373         for f in self.client.srcrecords.Files:
374             self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
375
376     def test_index_hash(self):
377         self.client._load()
378
379         indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_debian_')+1:].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
380
381         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
382                             '/var/lib/apt/lists/' + self.releaseFile + 
383                             ' | grep -E " main/binary-i386/Packages.bz2$"'
384                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
385
386         self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
387
388     def verifyHash(self, found_hash, path, true_hash):
389         self.failUnless(found_hash[0] == true_hash, 
390                     "%s hashes don't match: %s != %s" % (path, found_hash[0], true_hash))
391
392     def test_findIndexHash(self):
393         lastDefer = defer.Deferred()
394         
395         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
396                             '/var/lib/apt/lists/' + self.releaseFile + 
397                             ' | grep -E " main/binary-i386/Packages.bz2$"'
398                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
399         idx_path = self.releaseFile[self.releaseFile.find('_debian_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
400
401         d = self.client.findHash(idx_path)
402         d.addCallback(self.verifyHash, idx_path, idx_hash)
403
404         d.addCallback(lastDefer.callback)
405         return lastDefer
406
407     def test_findPkgHash(self):
408         lastDefer = defer.Deferred()
409         
410         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
411                             '/var/lib/apt/lists/' + self.packagesFile + 
412                             ' | grep -E "^SHA1:" | head -n 1' + 
413                             ' | cut -d\  -f 2').read().rstrip('\n')
414         pkg_path = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
415                             '/var/lib/apt/lists/' + self.packagesFile + 
416                             ' | grep -E "^Filename:" | head -n 1' + 
417                             ' | cut -d\  -f 2').read().rstrip('\n')
418
419         d = self.client.findHash(pkg_path)
420         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
421
422         d.addCallback(lastDefer.callback)
423         return lastDefer
424
425     def test_findSrcHash(self):
426         lastDefer = defer.Deferred()
427         
428         src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
429                             '/var/lib/apt/lists/' + self.sourcesFile + 
430                             ' | grep -E "^Directory:" | head -n 1' + 
431                             ' | cut -d\  -f 2').read().rstrip('\n')
432         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
433                             '/var/lib/apt/lists/' + self.sourcesFile + 
434                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
435                             ' | cut -d\  -f 2').read().split('\n')[:-1]
436         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
437                             '/var/lib/apt/lists/' + self.sourcesFile + 
438                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
439                             ' | cut -d\  -f 4').read().split('\n')[:-1]
440
441         i = choice(range(len(src_hashes)))
442         d = self.client.findHash(src_dir + '/' + src_paths[i])
443         d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
444             
445         d.addCallback(lastDefer.callback)
446         return lastDefer
447
448     def test_multipleFindHash(self):
449         lastDefer = defer.Deferred()
450         
451         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
452                             '/var/lib/apt/lists/' + self.releaseFile + 
453                             ' | grep -E " main/binary-i386/Packages.bz2$"'
454                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
455         idx_path = self.releaseFile[self.releaseFile.find('_debian_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
456
457         d = self.client.findHash(idx_path)
458         d.addCallback(self.verifyHash, idx_path, idx_hash)
459
460         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
461                             '/var/lib/apt/lists/' + self.packagesFile + 
462                             ' | grep -E "^SHA1:" | head -n 1' + 
463                             ' | cut -d\  -f 2').read().rstrip('\n')
464         pkg_path = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
465                             '/var/lib/apt/lists/' + self.packagesFile + 
466                             ' | grep -E "^Filename:" | head -n 1' + 
467                             ' | cut -d\  -f 2').read().rstrip('\n')
468
469         d = self.client.findHash(pkg_path)
470         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
471
472         src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
473                             '/var/lib/apt/lists/' + self.sourcesFile + 
474                             ' | grep -E "^Directory:" | head -n 1' + 
475                             ' | cut -d\  -f 2').read().rstrip('\n')
476         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
477                             '/var/lib/apt/lists/' + self.sourcesFile + 
478                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
479                             ' | cut -d\  -f 2').read().split('\n')[:-1]
480         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
481                             '/var/lib/apt/lists/' + self.sourcesFile + 
482                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
483                             ' | cut -d\  -f 4').read().split('\n')[:-1]
484
485         for i in range(len(src_hashes)):
486             d = self.client.findHash(src_dir + '/' + src_paths[i])
487             d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
488             
489         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
490                             '/var/lib/apt/lists/' + self.releaseFile + 
491                             ' | grep -E " main/source/Sources.bz2$"'
492                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
493         idx_path = self.releaseFile[self.releaseFile.find('_debian_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
494
495         d = self.client.findHash(idx_path)
496         d.addCallback(self.verifyHash, idx_path, idx_hash)
497
498         d.addCallback(lastDefer.callback)
499         return lastDefer
500
501     def tearDown(self):
502         for p in self.pending_calls:
503             if p.active():
504                 p.cancel()
505         self.pending_calls = []
506         self.client.cleanup()
507         self.client = None