From 133dec462409d613b3b792d56285fce7abda7521 Mon Sep 17 00:00:00 2001 From: Cameron Dale Date: Fri, 8 Feb 2008 17:30:14 -0800 Subject: [PATCH] Lots of work on the motivating paper, and a new figure. --- docs/motivation/.gitignore | 1 + docs/motivation/all.bib | 77 ++ docs/motivation/apt-dht-motivation.kilepr | 14 +- .../apt_dht_simulation-size_CDF.eps | 698 ++++++++++++++++++ docs/motivation/motivation.tex | 392 +++++++++- 5 files changed, 1155 insertions(+), 27 deletions(-) create mode 100644 docs/motivation/apt_dht_simulation-size_CDF.eps diff --git a/docs/motivation/.gitignore b/docs/motivation/.gitignore index a93d2b2..3809b76 100644 --- a/docs/motivation/.gitignore +++ b/docs/motivation/.gitignore @@ -5,3 +5,4 @@ *.log *.dvi *.backup +*~ diff --git a/docs/motivation/all.bib b/docs/motivation/all.bib index bdadf9e..6bf640a 100644 --- a/docs/motivation/all.bib +++ b/docs/motivation/all.bib @@ -125,6 +125,83 @@ keywords = "imported-10-07-2007 skype" } +@electronic{ kenosis, + title = "The {Kenosis} Website", + url = "http://kenosis.sourceforge.net/", + year = "2008", + key = "kenosis", +} + +@electronic{ debtorrent, + title = "The {DebTorrent} Website", + url = "http://debtorrent.alioth.debian.org/", + year = "2008", + key = "debtorrent", +} + +@electronic{ apttorrent, + title = "The {Apt-Torrent} Website", + url = "http://sianka.free.fr/", + year = "2008", + key = "apttorrent", +} + +@electronic{ azureus-dht, + title = "The {Azureus DHT} refernce", + url = "http://www.azureuswiki.com/index.php/DHT", + year = "2008", + key = "azureus-dht", +} + +@electronic{ bittorrent-dht, + title = "The {BitTorrent DHT} Draft Specification", + url = "http://wiki.theory.org/BitTorrentDraftDHTProtocol", + year = "2008", + key = "bittorrent-dht", +} + +@electronic{ debian, + title = "The {Debian} Project Website", + url = "http://www.debian.org/", + year = "2008", + key = "debian", +} + +@electronic{ fedora, + title = "The {Fedora} Website", + url = "http://fedoraproject.org/", + year = "2008", + key = "fedora", +} + +@electronic{ cpan, + title = "The {CPAN} Website", + url = "http://www.cpan.org/", + year = "2008", + key = "cpan", +} + +@electronic{ cygwin, + title = "The {Cygwin} Website", + url = "http://cygwin.com/", + year = "2008", + key = "cygwin", +} + +@electronic{ gentoo, + title = "The {Gentoo} Website", + url = "http://www.gentoo.org/", + year = "2008", + key = "gentoo", +} + +@electronic{ popcon, + title = "The {Debian Popularity Contest} Website", + url = "http://popcon.debian.org/", + year = "2008", + key = "popcon", +} + %%%%%%% BibSonomy References %%%%%%%%% @book{bollobas01, diff --git a/docs/motivation/apt-dht-motivation.kilepr b/docs/motivation/apt-dht-motivation.kilepr index f2b113f..ee63686 100644 --- a/docs/motivation/apt-dht-motivation.kilepr +++ b/docs/motivation/apt-dht-motivation.kilepr @@ -13,7 +13,15 @@ src_extensions=.tex .ltx .bib .mp [Tools] MakeIndex= -QuickBuild= +QuickBuild=LaTeX+DVItoPDF+ViewPDF + +[item:all.bib] +archive=true +column=19 +encoding=UTF-8 +highlight=BibTeX +line=201 +open=false [item:apt-dht-motivation.kilepr] archive=true @@ -25,8 +33,8 @@ open=false [item:motivation.tex] archive=true -column=38 +column=46 encoding=UTF-8 highlight=LaTeX -line=7 +line=354 open=true diff --git a/docs/motivation/apt_dht_simulation-size_CDF.eps b/docs/motivation/apt_dht_simulation-size_CDF.eps new file mode 100644 index 0000000..0ccb9ff --- /dev/null +++ b/docs/motivation/apt_dht_simulation-size_CDF.eps @@ -0,0 +1,698 @@ +%!PS-Adobe-2.0 EPSF-1.2 +%%Creator: MATLAB, The Mathworks, Inc. Version 7.5.0.338 (R2007b). Operating System: Linux 2.6.18.8-0.7-default #1 SMP Tue Oct 2 17:21:08 UTC 2007 i686. +%%Title: /cs/grad1/camerond/school/matlab/cache/apt_dht_simulation-size_CDF.20080208T171700.eps +%%CreationDate: 02/08/2008 17:17:44 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%Extensions: CMYK +%%Pages: 1 +%%BoundingBox: 58 196 550 591 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile exch readhexstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: 58 196 550 591 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode 0216 7344 csm + + 480 247 5913 4745 MR c np +76 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 6913 5186 PR +6 w +0 4226 5356 0 0 -4226 899 4615 4 MP +PP +-5356 0 0 4226 5356 0 0 -4226 899 4615 5 MP stroke +4 w +DO +0 sg + 899 4615 mt 899 389 L + 899 389 mt 899 389 L +1970 4615 mt 1970 389 L +1970 389 mt 1970 389 L +3041 4615 mt 3041 389 L +3041 389 mt 3041 389 L +4112 4615 mt 4112 389 L +4112 389 mt 4112 389 L +5183 4615 mt 5183 389 L +5183 389 mt 5183 389 L +6255 4615 mt 6255 389 L +6255 389 mt 6255 389 L + 899 4615 mt 6255 4615 L +6255 4615 mt 6255 4615 L + 899 4192 mt 6255 4192 L +6255 4192 mt 6255 4192 L + 899 3769 mt 6255 3769 L +6255 3769 mt 6255 3769 L + 899 3347 mt 6255 3347 L +6255 3347 mt 6255 3347 L + 899 2924 mt 6255 2924 L +6255 2924 mt 6255 2924 L + 899 2502 mt 6255 2502 L +6255 2502 mt 6255 2502 L + 899 2079 mt 6255 2079 L +6255 2079 mt 6255 2079 L + 899 1656 mt 6255 1656 L +6255 1656 mt 6255 1656 L + 899 1234 mt 6255 1234 L +6255 1234 mt 6255 1234 L + 899 811 mt 6255 811 L +6255 811 mt 6255 811 L + 899 389 mt 6255 389 L +6255 389 mt 6255 389 L +SO +6 w + 899 4615 mt 6255 4615 L + 899 389 mt 6255 389 L + 899 4615 mt 899 389 L +6255 4615 mt 6255 389 L + 899 4615 mt 6255 4615 L + 899 4615 mt 899 389 L + 899 4615 mt 899 4588 L + 899 389 mt 899 415 L +DO + 899 4615 mt 899 389 L + 899 389 mt 899 389 L +SO + 899 4615 mt 899 4561 L + 899 389 mt 899 442 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + + 811 4797 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + + 944 4723 mt +(0) s +1221 4615 mt 1221 4588 L +1221 389 mt 1221 415 L +DO +1221 4615 mt 1221 389 L +1221 389 mt 1221 389 L +SO +1410 4615 mt 1410 4588 L +1410 389 mt 1410 415 L +DO +1410 4615 mt 1410 389 L +1410 389 mt 1410 389 L +SO +1543 4615 mt 1543 4588 L +1543 389 mt 1543 415 L +DO +1543 4615 mt 1543 389 L +1543 389 mt 1543 389 L +SO +1647 4615 mt 1647 4588 L +1647 389 mt 1647 415 L +DO +1647 4615 mt 1647 389 L +1647 389 mt 1647 389 L +SO +1732 4615 mt 1732 4588 L +1732 389 mt 1732 415 L +DO +1732 4615 mt 1732 389 L +1732 389 mt 1732 389 L +SO +1804 4615 mt 1804 4588 L +1804 389 mt 1804 415 L +DO +1804 4615 mt 1804 389 L +1804 389 mt 1804 389 L +SO +1866 4615 mt 1866 4588 L +1866 389 mt 1866 415 L +DO +1866 4615 mt 1866 389 L +1866 389 mt 1866 389 L +SO +1921 4615 mt 1921 4588 L +1921 389 mt 1921 415 L +DO +1921 4615 mt 1921 389 L +1921 389 mt 1921 389 L +SO +1970 4615 mt 1970 4588 L +1970 389 mt 1970 415 L +DO +1970 4615 mt 1970 389 L +1970 389 mt 1970 389 L +SO +1970 4615 mt 1970 4561 L +1970 389 mt 1970 442 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +1882 4797 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +2015 4723 mt +(1) s +2292 4615 mt 2292 4588 L +2292 389 mt 2292 415 L +DO +2292 4615 mt 2292 389 L +2292 389 mt 2292 389 L +SO +2481 4615 mt 2481 4588 L +2481 389 mt 2481 415 L +DO +2481 4615 mt 2481 389 L +2481 389 mt 2481 389 L +SO +2615 4615 mt 2615 4588 L +2615 389 mt 2615 415 L +DO +2615 4615 mt 2615 389 L +2615 389 mt 2615 389 L +SO +2718 4615 mt 2718 4588 L +2718 389 mt 2718 415 L +DO +2718 4615 mt 2718 389 L +2718 389 mt 2718 389 L +SO +2803 4615 mt 2803 4588 L +2803 389 mt 2803 415 L +DO +2803 4615 mt 2803 389 L +2803 389 mt 2803 389 L +SO +2875 4615 mt 2875 4588 L +2875 389 mt 2875 415 L +DO +2875 4615 mt 2875 389 L +2875 389 mt 2875 389 L +SO +2937 4615 mt 2937 4588 L +2937 389 mt 2937 415 L +DO +2937 4615 mt 2937 389 L +2937 389 mt 2937 389 L +SO +2992 4615 mt 2992 4588 L +2992 389 mt 2992 415 L +DO +2992 4615 mt 2992 389 L +2992 389 mt 2992 389 L +SO +3041 4615 mt 3041 4588 L +3041 389 mt 3041 415 L +DO +3041 4615 mt 3041 389 L +3041 389 mt 3041 389 L +SO +3041 4615 mt 3041 4561 L +3041 389 mt 3041 442 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +2953 4797 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +3086 4723 mt +(2) s +3363 4615 mt 3363 4588 L +3363 389 mt 3363 415 L +DO +3363 4615 mt 3363 389 L +3363 389 mt 3363 389 L +SO +3552 4615 mt 3552 4588 L +3552 389 mt 3552 415 L +DO +3552 4615 mt 3552 389 L +3552 389 mt 3552 389 L +SO +3686 4615 mt 3686 4588 L +3686 389 mt 3686 415 L +DO +3686 4615 mt 3686 389 L +3686 389 mt 3686 389 L +SO +3790 4615 mt 3790 4588 L +3790 389 mt 3790 415 L +DO +3790 4615 mt 3790 389 L +3790 389 mt 3790 389 L +SO +3874 4615 mt 3874 4588 L +3874 389 mt 3874 415 L +DO +3874 4615 mt 3874 389 L +3874 389 mt 3874 389 L +SO +3946 4615 mt 3946 4588 L +3946 389 mt 3946 415 L +DO +3946 4615 mt 3946 389 L +3946 389 mt 3946 389 L +SO +4008 4615 mt 4008 4588 L +4008 389 mt 4008 415 L +DO +4008 4615 mt 4008 389 L +4008 389 mt 4008 389 L +SO +4063 4615 mt 4063 4588 L +4063 389 mt 4063 415 L +DO +4063 4615 mt 4063 389 L +4063 389 mt 4063 389 L +SO +4112 4615 mt 4112 4588 L +4112 389 mt 4112 415 L +DO +4112 4615 mt 4112 389 L +4112 389 mt 4112 389 L +SO +4112 4615 mt 4112 4561 L +4112 389 mt 4112 442 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +4024 4797 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +4157 4723 mt +(3) s +4435 4615 mt 4435 4588 L +4435 389 mt 4435 415 L +DO +4435 4615 mt 4435 389 L +4435 389 mt 4435 389 L +SO +4623 4615 mt 4623 4588 L +4623 389 mt 4623 415 L +DO +4623 4615 mt 4623 389 L +4623 389 mt 4623 389 L +SO +4757 4615 mt 4757 4588 L +4757 389 mt 4757 415 L +DO +4757 4615 mt 4757 389 L +4757 389 mt 4757 389 L +SO +4861 4615 mt 4861 4588 L +4861 389 mt 4861 415 L +DO +4861 4615 mt 4861 389 L +4861 389 mt 4861 389 L +SO +4946 4615 mt 4946 4588 L +4946 389 mt 4946 415 L +DO +4946 4615 mt 4946 389 L +4946 389 mt 4946 389 L +SO +5017 4615 mt 5017 4588 L +5017 389 mt 5017 415 L +DO +5017 4615 mt 5017 389 L +5017 389 mt 5017 389 L +SO +5079 4615 mt 5079 4588 L +5079 389 mt 5079 415 L +DO +5079 4615 mt 5079 389 L +5079 389 mt 5079 389 L +SO +5134 4615 mt 5134 4588 L +5134 389 mt 5134 415 L +DO +5134 4615 mt 5134 389 L +5134 389 mt 5134 389 L +SO +5183 4615 mt 5183 4588 L +5183 389 mt 5183 415 L +DO +5183 4615 mt 5183 389 L +5183 389 mt 5183 389 L +SO +5183 4615 mt 5183 4561 L +5183 389 mt 5183 442 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +5095 4797 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +5228 4723 mt +(4) s +5506 4615 mt 5506 4588 L +5506 389 mt 5506 415 L +DO +5506 4615 mt 5506 389 L +5506 389 mt 5506 389 L +SO +5694 4615 mt 5694 4588 L +5694 389 mt 5694 415 L +DO +5694 4615 mt 5694 389 L +5694 389 mt 5694 389 L +SO +5828 4615 mt 5828 4588 L +5828 389 mt 5828 415 L +DO +5828 4615 mt 5828 389 L +5828 389 mt 5828 389 L +SO +5932 4615 mt 5932 4588 L +5932 389 mt 5932 415 L +DO +5932 4615 mt 5932 389 L +5932 389 mt 5932 389 L +SO +6017 4615 mt 6017 4588 L +6017 389 mt 6017 415 L +DO +6017 4615 mt 6017 389 L +6017 389 mt 6017 389 L +SO +6089 4615 mt 6089 4588 L +6089 389 mt 6089 415 L +DO +6089 4615 mt 6089 389 L +6089 389 mt 6089 389 L +SO +6151 4615 mt 6151 4588 L +6151 389 mt 6151 415 L +DO +6151 4615 mt 6151 389 L +6151 389 mt 6151 389 L +SO +6205 4615 mt 6205 4588 L +6205 389 mt 6205 415 L +DO +6205 4615 mt 6205 389 L +6205 389 mt 6205 389 L +SO +6255 4615 mt 6255 4588 L +6255 389 mt 6255 415 L +DO +6255 4615 mt 6255 389 L +6255 389 mt 6255 389 L +SO +6255 4615 mt 6255 4561 L +6255 389 mt 6255 442 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +6167 4797 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +6300 4723 mt +(5) s + 899 4615 mt 952 4615 L +6255 4615 mt 6201 4615 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + + 798 4659 mt +(0) s + 899 4192 mt 952 4192 L +6255 4192 mt 6201 4192 L + 698 4236 mt +(0.1) s + 899 3769 mt 952 3769 L +6255 3769 mt 6201 3769 L + 698 3813 mt +(0.2) s + 899 3347 mt 952 3347 L +6255 3347 mt 6201 3347 L + 698 3391 mt +(0.3) s + 899 2924 mt 952 2924 L +6255 2924 mt 6201 2924 L + 698 2968 mt +(0.4) s + 899 2502 mt 952 2502 L +6255 2502 mt 6201 2502 L + 698 2546 mt +(0.5) s + 899 2079 mt 952 2079 L +6255 2079 mt 6201 2079 L + 698 2123 mt +(0.6) s + 899 1656 mt 952 1656 L +6255 1656 mt 6201 1656 L + 698 1700 mt +(0.7) s + 899 1234 mt 952 1234 L +6255 1234 mt 6201 1234 L + 698 1278 mt +(0.8) s + 899 811 mt 952 811 L +6255 811 mt 6201 811 L + 698 855 mt +(0.9) s + 899 389 mt 952 389 L +6255 389 mt 6201 389 L + 798 433 mt +(1) s + 899 4615 mt 6255 4615 L + 899 389 mt 6255 389 L + 899 4615 mt 899 389 L +6255 4615 mt 6255 389 L +gs 899 389 5357 4227 MR c np +322 -20 322 -50 323 -89 322 -157 323 -232 322 -349 323 -431 322 -523 +323 -587 322 -596 323 -580 322 -399 323 -150 322 -44 1221 4605 15 MP stroke +DA +322 -8 322 -49 323 -92 322 -200 323 -224 322 -379 323 -536 322 -679 +323 -671 322 -591 323 -442 322 -262 323 -58 322 -8 1221 4595 15 MP stroke +gr + +DA +3087 4940 mt +(Package Size \(kB\)) s + 616 3117 mt -90 rotate +(Cumulative Distribution) s +90 rotate + 882 4658 mt +( ) s +6239 431 mt +( ) s +SO +1 sg +0 334 1208 0 0 -334 4988 784 4 MP +PP +-1208 0 0 334 1208 0 0 -334 4988 784 5 MP stroke +4 w +DO +SO +6 w +0 sg +4988 784 mt 6196 784 L +4988 450 mt 6196 450 L +4988 784 mt 4988 450 L +6196 784 mt 6196 450 L +4988 784 mt 6196 784 L +4988 784 mt 4988 450 L +4988 784 mt 6196 784 L +4988 450 mt 6196 450 L +4988 784 mt 4988 450 L +6196 784 mt 6196 450 L +5450 585 mt +(By Number) s +gs 4988 450 1209 335 MR c np +355 0 5059 543 2 MP stroke +gr + +5450 734 mt +(By Popularity) s +gs 4988 450 1209 335 MR c np +DA +355 0 5059 690 2 MP stroke +SO +gr + + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/docs/motivation/motivation.tex b/docs/motivation/motivation.tex index 37e84d0..142b244 100644 --- a/docs/motivation/motivation.tex +++ b/docs/motivation/motivation.tex @@ -2,24 +2,7 @@ \usepackage[noadjust]{cite} -%\ifCLASSINFOpdf - % \usepackage[pdftex]{graphicx} - % declare the path(s) where your graphic files are - % \graphicspath{{../pdf/}{../jpeg/}} - % and their extensions so you won't have to specify these with - % every instance of \includegraphics - % \DeclareGraphicsExtensions{.pdf,.jpeg,.png} -%\else - % or other class option (dvipsone, dvipdf, if not using dvips). graphicx - % will default to the driver specified in the system graphics.cfg if no - % driver is specified. \usepackage[dvips]{graphicx} - % declare the path(s) where your graphic files are - % \graphicspath{{../eps/}} - % and their extensions so you won't have to specify these with - % every instance of \includegraphics - % \DeclareGraphicsExtensions{.eps} -%\fi \usepackage{url} \usepackage[cmex10]{amsmath} @@ -29,7 +12,7 @@ \begin{document} -\title{Using Altruistic Peers to Supplement Downloading Bandwidth} +\title{Using Altruistic Peers to Help With Free Web Downloads} \author{\IEEEauthorblockN{Cameron Dale} \IEEEauthorblockA{School of Computing Science\\ Simon Fraser University\\ @@ -39,35 +22,396 @@ Email: camerond@cs.sfu.ca}} \maketitle \begin{abstract} +A large amount of free content is available over the Internet from +many different sources. Most of this content uses the traditional +client-server model of requests from users. However, there is an +opportunity to use peer-to-peer techniques to reduce the cost of +much of this distribution, especially due to the altruistic nature +of many of these users. We present a general technique for +satisfying the needs of this P2P distribution, which is applicable +to many of these distributors systems. Our method makes use of a DHT +for storing the location of peers, using the cryptographic hash of +the file as a key. We then go further and implement a solution for +the distribution of Debian software packages. \end{abstract} -%%%%%%%%%%%%%%%%%%%%%%%%%%% Section %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%% Section %%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Introduction} \label{intro} +There are a large number of free content distributors with using +package distribution systems over the Internet to distribute content +to their users. These distributors have developed many different +methods for this distribution, but they mostly use a client-server +model to satisfy user requests. The popularity and number of users +results in a large number of requests, which usually requires a +network of mirrors to handle. Due to the free nature of this +content, many users are willing and able to contribute upload +bandwidth to this distribution, but have no current way to do this. +We present a new peer-to-peer distribution model to meet these +demands. It is based on many previous implementations of successful +peer-to-peer protocols, including distributed hash tables (DHT) and +BitTorrent. The model relies on the pre-existence of cryptographic +hashes of the content, which should uniquely identify it for a +request from other peers. If the peer-to-peer download fails, then +the original request to the server is used as a fallback to prevent +any dissatisfaction from mirrors. The peer can then share this new +content with others. -%%%%%%%%%%%%%%%%%%%%%%%%%%% Section %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +First, we examine the opportunity that is available for many of +these free content distributors. We present an overview of a system +that would efficiently satisfy the demands of a large number of +users, and should substantially reduce the currently substantial +bandwidth requirements of hosting this content. We then present an +example implementation based on the Debian package distribution +system. This implementation will be used by a large number of users, +and serves as an example for other free content distributors of the +opportunity that can be met with such a system. + +%%%%%%%%%%%%%%%%%%%%%%%%%%% Section %%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Related Work} \label{related} +There are many peer-to-peer implementations available today, but +none is very well suited to this specific problem. + +Many distributors make their content available in some form using +BitTorrent \cite{COHEN03}, e.g. for the distribution of CD +images. This is not an ideal situation though, as it requires the +peers to download large amounts of content that they are not +interested in, and prevents them from updating to new content +without downloading another image containing a lot of the same +content they already have. There have also been other attempted +implementations, usually based on some form of BitTorrent, to +address this problem. Unfortunately, BitTorrent is not ideally +suited to an application such as this one, for the following +reasons: +\begin{itemize} + \item The packages are too small and there are too many to create + individual torrents for each one. + \item All the packages together are too large to track efficiently + as a single torrent. + \item BitTorrent's piece sizes are bigger than many of the + packages, which wastes bandwidth downloading parts of other + packages. + \item A small number of the packages can be updated every day, + which may require a new torrent, therefore splitting the + download population. + \item Incentives to share (upload) are no longer needed, as the + content is freely available for anyone to download without + sharing (seeds are also not needed). +\end{itemize} + +Some other implementations have used BitTorrent almost unmodified, +while others have only looked at using DHTs to replace the tracker +in a BitTorrent system. apt-torrent \cite{apttorrent} creates +torrents for some of the larger content available, but it ignores +the smaller content, which is often the more requested content. +DebTorrent \cite{debtorrent} makes widespread modifications to a +traditional BitTorrent client, but also requires some changes to the +distribution system to support it, and separates peers into groups +based on their characteristics which limits the effectiveness of the +sharing. Kenosis \cite{kenosis} is a P2P Remote Procedure Call +client also based on the Kademlia DHT, but it is meant as a P2P +primitive system on which other tools can be built, and so it has no +file sharing functionality at all. Many have used a DHT as a drop-in +replacement for the tracker in a BitTorrent system +\cite{bittorrent-dht, azureus-dht}, but such systems only use the +DHT to find peers for the same torrent, so the file sharing uses +traditional BitTorrent and so is not ideal for the reasons listed +above. + +%%%%%%%%%%%%%%%%%%%%%%%%%%% Section %%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\section{Situation} +\label{situation} + +There are a large number of groups using the Internet to distribute +their free content. This content is usually available from a free +download site, which usually requires a network of mirrors to +support the large number of requests. This content almost always +supports some kind of file verification, usually cryptographic +hashes, to verify the completed downloads accuracy or authenticity. + +\subsection{Examples} +\label{examples} + +Most Linux distributions use a software package management system +that fetches packages to be installed from a network of mirrors. The +Debian project \cite{debian} (and other Debian-based distributions) +uses the \texttt{apt} program, which downloads Debian packages in +the \texttt{.deb} format from one of many mirrors. The program will +first download index files that contain a listing of which packages +are available, as well as important information such as their size, +location, and a hash of their content. Red Hat's Fedora project +\cite{fedora} (and other RPM-based distributions) use the +\texttt{yum} program to obtain RPMs, and Gentoo \cite{gentoo} uses +\texttt{portage} in a similar way. + +Other software vendors also use a similar system. CPAN \cite{cpan} +distrbutes files containing software packages for the PERL +programming language. CYGWIN \cite{cygwin} provides many of the +standard Unix/Linux tools in a Windows environment, also using a +package management tool that requests packages from websites. There +are two software distribution systems for Mac OSX, fink and +MacPorts, that also retrieve packages in this way. + +Also, some systems use direct web downloads, but with a hash +verification file also available for download next to the desired +file. These hash files usually have the same file name, but with an +added extension identifying the hash used (e.g. \texttt{.md5} for +the MD5 hash). + +Finally, there are other programs that use cryptographic hashing to +identify files. Git is a version control system in which all files, +commits, and tags, are identified by their SHA1 hash. These hashes +are used to verify the origin of these items, but are also used when +clients update their local files with new remote information. + +\subsection{Similarities} +\label{similarities} + +The important things to note for each of the systems mentioned in +section \ref{examples}, is that they all have the following in +common: +\begin{itemize} + \item The content is avaliable for anyone to freely download. + \item The content is divided into packages, each of which contains + a small independent part of all the content. + \item No peers are interested in downloading all of the content + available. + \item Hashes of the content and its size are available before the + download is attempted. + \item Requests for the content are sent by a tool, not directly by + the user, though the tool is responding to requests from the user. +\end{itemize} + +We also expect that there are a number of users of these systems +that are motivated by altruism to want to help out with this +distribution. This is common in these systems due to the free nature +of the content being delivered, which encourages some to want to +help out in some way. A number of the systems are also used by +groups that are staffed mostly, or sometimes completely, by +volunteers. This also encourages users to want to \emph{give back} +to the volunteer community that has created the content they are +using. + +\subsection{Differences} +\label{differences} + +Although at first it might seem that a single all-reaching solution +is possible for this situation, there are some differences in each +system that require independent solutions. The systems all use +different tools for their distribution, so any implementation would +have to be specific to the tool it is to integrate with. In +particular, how to obtain requests from the tool or user, and how to +find a hash that corresponds to the file being requested, is very +specific to each system. + +Also, there may be no benefit in creating a single large solution to +integrate all these problems. For one, though they sometimes +distribute nearly identical content (e.g. the same software +available in multiple Linux distributions), it is not exactly +identical and has usually been tailored to the system. The small +differences will make it impossible to distribute similar content +across systems. And, although peer-to-peer systems scale very well +with the number of peers in the system, there is some overhead +involved. So, having a much larger system of peers would mean that +requests could take longer to complete. + +%%%%%%%%%%%%%%%%%%%%%%%%%%% Section %%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\section{Opportunity} +\label{opportunity} + +The situation described in section \ref{situation} presents a clear +opportunity to use some form of peer-to-peer file sharing protocols. +This sparse interest in a large number of packages, and constant +updating, is well suited to the functionality provided by a +Distributed Hash Table (DHT). DHTs require unique keys to store and +retrieve strings of data, for which the cryptographic hashes used by +these package management systems are perfect for. The stored and +retrieved strings can then be pointers to the peers that have the +content package that hashes to that key. A downloading peer can then +lookup the package hash in the DHT, and then, if it is found, +download the file from those peers and verify the content. If the +package hash can not be found in the DHT, the peer will fallback to +downloading from the original content location (i.e. the network of +mirrors), and once complete will add a new entry to the DHT +indicating that it has the package. + +\subsection{Implementation Options} +\label{imp_options} + +There are several ways to implement the desired P2P functionality +into the existing package management software. The functionality can +be directly integrated into the software, though this can be +difficult as the DHT should be running at all times, both for +efficient lookups and to support uploading of already downloaded +content. Many of the package management software implmentations use +HTTP requests to download the files, which makes it possible to +implement the P2P aspect as a standard HTTP caching proxy, which +will get uncached requests first from the P2P system, and then +fallback to the normal HTTP request from a server. For methods that +don't use HTTP requests, other types of proxies may also be +possible. + +\subsection{Downloading From Peers} +\label{downloading} + +Downloading a file efficiently from a number of peers is where +BitTorrent shines as a peer-to-peer application. Its method of +breaking up larger files into sub-pieces, each with its own hash, +makes it very easy to parallelize the downloading process and +maximize the download speed. For very small packages (i.e. less than +the sub-piece size), this parallel downloading is not necessary, or +even desirable. However, this method can still be used in +conjunction with the DHT, for the larger packages that are +available. + +In addition to storing the file download location (which would still +be used for small files), a peer will store a \emph{torrent string} +containing the peer's hashes of the individual pieces for the larger +files. These piece hashes could be compared ahead of time to +determine which peers have the same piece hashes (they all should), +and then used during the download to verify the downloaded pieces. + +For very large files (about 5 or more pieces), the torrent strings +are too long to store in the DHT (a single UDP packet should be less +than 1472 bytes to avoid fragmentation). Instead, the peers will +store the torrent string for large files separately in the DHT, and +only contain a reference to it in their stored value for the hash of +the file. The reference would be a hash of the torrent string. If +the torrent string is short enough to store in the DHT (i.e. less +than 1472 bytes, or about 70 pieces for the SHA1 hash), then a +lookup of that hash in the DHT would give the torrent-like string. +Otherwise, a request to the peer for the hash (just like files are +downloaded), should return the torrent string. + +% This can cause a problem with hash checking the returned data, as +% hashes for the pieces are not known. Any file that fails a hash +% check should be downloaded again, with each piece being downloaded +% from different peers than it was previously. The peers are shifted +% by 1, so that if a peers previously downloaded piece i, it now +% downloads piece i+1, and the first piece is downloaded by the +% previous downloader of the last piece, or preferably a previously +% unused peer. As each piece is downloaded the running hash of the +% file should be checked to determine the place at which the file +% differs from the previous download. +% +% If the hash check then passes, then the peer who originally provided +% the bad piece can be assessed blame for the error. Otherwise, the +% peer who originally provided the piece is probably at fault, since +% he is now providing a later piece. This doesn't work if the +% differing piece is the first piece, in which case it is downloaded +% from a 3rd peer, with consensus revealing the misbehaving peer. + +%%%%%%%%%%%%%%%%%%%%%%%%%%% Section %%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\section{Sample Implementation} +\label{implementation} + +A sample implementation has been created that functions as described +in section \ref{opportunity}. This software, called +\texttt{apt-dht}, interacts with the \texttt{apt} tool found in most +Debian-based Linux distributions. Our tool uses SHA1 hashes to +verify most downloaded files, including the large index files that +contain the hashes of the individual packages. We chose this +distribution system as it is familiar to us, and there are +interesting statistics available for analyzing the popularity of the +software packages \cite{popcon}. + +Since all requests from apt are in the form of HTTP requests to a +server, the implementation takes the form of a caching HTTP proxy. +Pointing a standard apt implementation to use the proxy is as simple +as prepending the proxy location and port to the front of the mirror +name (i.e. ``localhost:9977/mirrorname.debian.org/\ldots''). + +The DHT is based on Khashmir, which is an implementation of the +Kademlia DHT using methods familiar to BitTorrent developers. The +communication is all handled by UDP messages, and RPC requests are +bencoded similar to torrent files. Khashmir uses the high-level +Twisted event-driven networking engine, so Twisted is also used for +all other networking needs. + +The torrent strings stored in the DHT are all bencoded dictionaries +containing similar information to what is in a torrent file. This +includes the piece size used, the length of the piece hash, and of +course the hashes of all the sub-pieces of the content. + +Downloading is accomplished by sending simple HTTP requests to the +peer's identified by lookups in the DHT to have the desired file. +The HTTP server used for the proxy also doubles as the server +listeneing for requests for downloads from other peers. All peers +support HTTP/1.1, both in the server and the client, which allows +for pipelining of multiple requests to a peer, and the requesting of +smaller pieces of a large file using the Range request header. +\begin{figure} +\centering +\includegraphics[width=\columnwidth]{apt_dht_simulation-size_CDF.eps} +\caption{The CDF of the size of packages in a Debian system, both +for the actual size and adjusted size based on the popularity of +the package.} +\label{size_CDF} +\end{figure} -%%%%%%%%%%%%%%%%%%%%%%%%%%% Section %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +Figure \ref{size_CDF} shows the package size of the 22,298 packages +available in Debian in January 2008. We can see that most of the +packages are quite small, and so most will therefore not require +sub-piece information to download. We have chosen a maximum piece +size of 512 kB, which means that 17,515 (78\%) of the packages will +not require this information. There are 3054 packages that will +require 2 to 4 pieces, for which the torrent string can be stored +directly with the package hash in the DHT. There are 1667 packages +that will require a separate lookup in the DHT for the longer +torrent string as they require 5 to 70 pieces. Finally, there are +only 62 packages that require more than 70 pieces, and so will +require a separate request to the user for the torrent string. -\section{Background} -\label{background} +\subsection{Current Status} +\label{status} +Though functional and useful, the current implementation is not +complete, and is missing some of the downloading details necessary +to make it complete. It operates as a caching HTTP proxy, serving +requests from apt and other peers. Index files are identified by the +current implementation, and parsed to find hashed of files for DHT +lookups if they are later requested. Files are downloaded from +peers, and those not available in peers are downloaded correctly +from the mirror. Any downloaded files are hash checked to verify +them, and then added to the DHT. +However, the current implementation does not use the sub-piece +hashing described in section \ref{downloading}, or any parallelizing +of downloads. Instead, it currently chooses a peer at random from +the list of possible peers, and downloads the entire file from that +peer, regardless of the size of the file. This needs to change if +both a) the file is large (more than 512 KB), and b) there are +multiple peers with the file. The storage and retrieval of sub-piece +hashes has not yet been finalized though, and until it is this final +step is not possible. -%%%%%%%%%%%%%%%%%%%%%%%%%%% Section %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%% Section %%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Conclusions} \label{conclusions} +We have designed a generally applicable peer-to-peer content +distribution model to be used by many of the free content +distributors operating today. It makes use of already existing +features of most package management systems, to create a +peer-to-peer distribution that should substantially reduce the costs +of hosting the content. +We have also implemented our design in software to be used in +conjuction with Debian-based distribution of Linux software +packages. It will soone be in use by many users of the Debian +project's distribution, and so serves as an example of the +possibilities that exist. \bibliographystyle{IEEEtran} \bibliography{./IEEEabrv,./all} -- 2.39.5