blob: 3186ac0a7ae4ec4737d57a45c700d7d238daac4e [file] [log] [blame]
Junio C Hamanof2b74942012-11-20 21:06:261<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
2 "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
3<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
4<head>
Junio C Hamano9d971152012-12-19 00:43:115<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
Junio C Hamanobc8d4782014-01-13 23:35:506<meta name="generator" content="AsciiDoc 8.6.6" />
7<title>Concerning Git&#8217;s Packing Heuristics</title>
Junio C Hamanof2b74942012-11-20 21:06:268<style type="text/css">
Junio C Hamano9d971152012-12-19 00:43:119/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
10
11/* Default font. */
12body {
13 font-family: Georgia,serif;
14}
15
16/* Title font. */
17h1, h2, h3, h4, h5, h6,
18div.title, caption.title,
19thead, p.table.header,
20#toctitle,
21#author, #revnumber, #revdate, #revremark,
22#footer {
23 font-family: Arial,Helvetica,sans-serif;
Junio C Hamanof2b74942012-11-20 21:06:2624}
25
26body {
27 margin: 1em 5% 1em 5%;
28}
29
30a {
31 color: blue;
32 text-decoration: underline;
33}
34a:visited {
35 color: fuchsia;
36}
37
38em {
39 font-style: italic;
40 color: navy;
41}
42
43strong {
44 font-weight: bold;
45 color: #083194;
46}
47
Junio C Hamanof2b74942012-11-20 21:06:2648h1, h2, h3, h4, h5, h6 {
49 color: #527bbd;
Junio C Hamanof2b74942012-11-20 21:06:2650 margin-top: 1.2em;
51 margin-bottom: 0.5em;
52 line-height: 1.3;
53}
54
55h1, h2, h3 {
56 border-bottom: 2px solid silver;
57}
58h2 {
59 padding-top: 0.5em;
60}
61h3 {
62 float: left;
63}
64h3 + * {
65 clear: left;
66}
Junio C Hamano9d971152012-12-19 00:43:1167h5 {
68 font-size: 1.0em;
69}
Junio C Hamanof2b74942012-11-20 21:06:2670
71div.sectionbody {
Junio C Hamanof2b74942012-11-20 21:06:2672 margin-left: 0;
73}
74
75hr {
76 border: 1px solid silver;
77}
78
79p {
80 margin-top: 0.5em;
81 margin-bottom: 0.5em;
82}
83
84ul, ol, li > p {
85 margin-top: 0;
86}
Junio C Hamano9d971152012-12-19 00:43:1187ul > li { color: #aaa; }
88ul > li > * { color: black; }
Junio C Hamanof2b74942012-11-20 21:06:2689
Junio C Hamanobc8d4782014-01-13 23:35:5090pre {
Junio C Hamanof2b74942012-11-20 21:06:2691 padding: 0;
92 margin: 0;
93}
94
Junio C Hamano9d971152012-12-19 00:43:1195#author {
Junio C Hamanof2b74942012-11-20 21:06:2696 color: #527bbd;
Junio C Hamanof2b74942012-11-20 21:06:2697 font-weight: bold;
98 font-size: 1.1em;
99}
Junio C Hamano9d971152012-12-19 00:43:11100#email {
Junio C Hamanof2b74942012-11-20 21:06:26101}
Junio C Hamano9d971152012-12-19 00:43:11102#revnumber, #revdate, #revremark {
Junio C Hamanof2b74942012-11-20 21:06:26103}
104
Junio C Hamano9d971152012-12-19 00:43:11105#footer {
Junio C Hamanof2b74942012-11-20 21:06:26106 font-size: small;
107 border-top: 2px solid silver;
108 padding-top: 0.5em;
109 margin-top: 4.0em;
110}
Junio C Hamano9d971152012-12-19 00:43:11111#footer-text {
Junio C Hamanof2b74942012-11-20 21:06:26112 float: left;
113 padding-bottom: 0.5em;
114}
Junio C Hamano9d971152012-12-19 00:43:11115#footer-badges {
Junio C Hamanof2b74942012-11-20 21:06:26116 float: right;
117 padding-bottom: 0.5em;
118}
119
Junio C Hamano9d971152012-12-19 00:43:11120#preamble {
Junio C Hamanof2b74942012-11-20 21:06:26121 margin-top: 1.5em;
122 margin-bottom: 1.5em;
123}
Junio C Hamano9d971152012-12-19 00:43:11124div.imageblock, div.exampleblock, div.verseblock,
Junio C Hamanof2b74942012-11-20 21:06:26125div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
126div.admonitionblock {
127 margin-top: 1.0em;
128 margin-bottom: 1.5em;
129}
130div.admonitionblock {
131 margin-top: 2.0em;
132 margin-bottom: 2.0em;
133 margin-right: 10%;
134 color: #606060;
135}
136
137div.content { /* Block element content. */
138 padding: 0;
139}
140
141/* Block element titles. */
142div.title, caption.title {
143 color: #527bbd;
Junio C Hamanof2b74942012-11-20 21:06:26144 font-weight: bold;
145 text-align: left;
146 margin-top: 1.0em;
147 margin-bottom: 0.5em;
148}
149div.title + * {
150 margin-top: 0;
151}
152
153td div.title:first-child {
154 margin-top: 0.0em;
155}
156div.content div.title:first-child {
157 margin-top: 0.0em;
158}
159div.content + div.title {
160 margin-top: 0.0em;
161}
162
163div.sidebarblock > div.content {
164 background: #ffffee;
Junio C Hamano9d971152012-12-19 00:43:11165 border: 1px solid #dddddd;
166 border-left: 4px solid #f0f0f0;
Junio C Hamanof2b74942012-11-20 21:06:26167 padding: 0.5em;
168}
169
170div.listingblock > div.content {
Junio C Hamano9d971152012-12-19 00:43:11171 border: 1px solid #dddddd;
172 border-left: 5px solid #f0f0f0;
173 background: #f8f8f8;
Junio C Hamanof2b74942012-11-20 21:06:26174 padding: 0.5em;
175}
176
177div.quoteblock, div.verseblock {
178 padding-left: 1.0em;
179 margin-left: 1.0em;
180 margin-right: 10%;
Junio C Hamano9d971152012-12-19 00:43:11181 border-left: 5px solid #f0f0f0;
182 color: #888;
Junio C Hamanof2b74942012-11-20 21:06:26183}
184
185div.quoteblock > div.attribution {
186 padding-top: 0.5em;
187 text-align: right;
188}
189
Junio C Hamano9d971152012-12-19 00:43:11190div.verseblock > pre.content {
191 font-family: inherit;
192 font-size: inherit;
Junio C Hamanof2b74942012-11-20 21:06:26193}
194div.verseblock > div.attribution {
195 padding-top: 0.75em;
196 text-align: left;
197}
198/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
199div.verseblock + div.attribution {
200 text-align: left;
201}
202
203div.admonitionblock .icon {
204 vertical-align: top;
205 font-size: 1.1em;
206 font-weight: bold;
207 text-decoration: underline;
208 color: #527bbd;
209 padding-right: 0.5em;
210}
211div.admonitionblock td.content {
212 padding-left: 0.5em;
213 border-left: 3px solid #dddddd;
214}
215
216div.exampleblock > div.content {
217 border-left: 3px solid #dddddd;
218 padding-left: 0.5em;
219}
220
221div.imageblock div.content { padding-left: 0; }
222span.image img { border-style: none; }
223a.image:visited { color: white; }
224
225dl {
226 margin-top: 0.8em;
227 margin-bottom: 0.8em;
228}
229dt {
230 margin-top: 0.5em;
231 margin-bottom: 0;
232 font-style: normal;
233 color: navy;
234}
235dd > *:first-child {
236 margin-top: 0.1em;
237}
238
239ul, ol {
240 list-style-position: outside;
241}
242ol.arabic {
243 list-style-type: decimal;
244}
245ol.loweralpha {
246 list-style-type: lower-alpha;
247}
248ol.upperalpha {
249 list-style-type: upper-alpha;
250}
251ol.lowerroman {
252 list-style-type: lower-roman;
253}
254ol.upperroman {
255 list-style-type: upper-roman;
256}
257
258div.compact ul, div.compact ol,
259div.compact p, div.compact p,
260div.compact div, div.compact div {
261 margin-top: 0.1em;
262 margin-bottom: 0.1em;
263}
264
Junio C Hamanof2b74942012-11-20 21:06:26265tfoot {
266 font-weight: bold;
267}
268td > div.verse {
269 white-space: pre;
270}
Junio C Hamanof2b74942012-11-20 21:06:26271
272div.hdlist {
273 margin-top: 0.8em;
274 margin-bottom: 0.8em;
275}
276div.hdlist tr {
277 padding-bottom: 15px;
278}
279dt.hdlist1.strong, td.hdlist1.strong {
280 font-weight: bold;
281}
282td.hdlist1 {
283 vertical-align: top;
284 font-style: normal;
285 padding-right: 0.8em;
286 color: navy;
287}
288td.hdlist2 {
289 vertical-align: top;
290}
291div.hdlist.compact tr {
292 margin: 0;
293 padding-bottom: 0;
294}
295
296.comment {
297 background: yellow;
298}
299
300.footnote, .footnoteref {
301 font-size: 0.8em;
302}
303
304span.footnote, span.footnoteref {
305 vertical-align: super;
306}
307
308#footnotes {
309 margin: 20px 0 20px 0;
310 padding: 7px 0 0 0;
311}
312
313#footnotes div.footnote {
314 margin: 0 0 5px 0;
315}
316
317#footnotes hr {
318 border: none;
319 border-top: 1px solid silver;
320 height: 1px;
321 text-align: left;
322 margin-left: 0;
323 width: 20%;
324 min-width: 100px;
325}
326
Junio C Hamano9d971152012-12-19 00:43:11327div.colist td {
328 padding-right: 0.5em;
329 padding-bottom: 0.3em;
330 vertical-align: top;
331}
332div.colist td img {
333 margin-top: 0.3em;
Junio C Hamanof2b74942012-11-20 21:06:26334}
335
Junio C Hamano9d971152012-12-19 00:43:11336@media print {
337 #footer-badges { display: none; }
338}
339
340#toc {
Junio C Hamanof2b74942012-11-20 21:06:26341 margin-bottom: 2.5em;
342}
343
Junio C Hamano9d971152012-12-19 00:43:11344#toctitle {
Junio C Hamanof2b74942012-11-20 21:06:26345 color: #527bbd;
Junio C Hamanof2b74942012-11-20 21:06:26346 font-size: 1.1em;
347 font-weight: bold;
348 margin-top: 1.0em;
349 margin-bottom: 0.1em;
350}
351
Junio C Hamanobc8d4782014-01-13 23:35:50352div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
Junio C Hamanof2b74942012-11-20 21:06:26353 margin-top: 0;
354 margin-bottom: 0;
355}
356div.toclevel2 {
357 margin-left: 2em;
358 font-size: 0.9em;
359}
360div.toclevel3 {
361 margin-left: 4em;
362 font-size: 0.9em;
363}
364div.toclevel4 {
365 margin-left: 6em;
366 font-size: 0.9em;
367}
Junio C Hamanof2b74942012-11-20 21:06:26368
Junio C Hamano9d971152012-12-19 00:43:11369span.aqua { color: aqua; }
370span.black { color: black; }
371span.blue { color: blue; }
372span.fuchsia { color: fuchsia; }
373span.gray { color: gray; }
374span.green { color: green; }
375span.lime { color: lime; }
376span.maroon { color: maroon; }
377span.navy { color: navy; }
378span.olive { color: olive; }
379span.purple { color: purple; }
380span.red { color: red; }
381span.silver { color: silver; }
382span.teal { color: teal; }
383span.white { color: white; }
384span.yellow { color: yellow; }
385
386span.aqua-background { background: aqua; }
387span.black-background { background: black; }
388span.blue-background { background: blue; }
389span.fuchsia-background { background: fuchsia; }
390span.gray-background { background: gray; }
391span.green-background { background: green; }
392span.lime-background { background: lime; }
393span.maroon-background { background: maroon; }
394span.navy-background { background: navy; }
395span.olive-background { background: olive; }
396span.purple-background { background: purple; }
397span.red-background { background: red; }
398span.silver-background { background: silver; }
399span.teal-background { background: teal; }
400span.white-background { background: white; }
401span.yellow-background { background: yellow; }
402
403span.big { font-size: 2em; }
404span.small { font-size: 0.6em; }
405
406span.underline { text-decoration: underline; }
407span.overline { text-decoration: overline; }
408span.line-through { text-decoration: line-through; }
409
Junio C Hamano9d971152012-12-19 00:43:11410
411/*
412 * xhtml11 specific
413 *
414 * */
415
Junio C Hamanobc8d4782014-01-13 23:35:50416tt {
417 font-family: monospace;
418 font-size: inherit;
419 color: navy;
420}
421
Junio C Hamano9d971152012-12-19 00:43:11422div.tableblock {
423 margin-top: 1.0em;
424 margin-bottom: 1.5em;
Junio C Hamanof2b74942012-11-20 21:06:26425}
Junio C Hamano9d971152012-12-19 00:43:11426div.tableblock > table {
427 border: 3px solid #527bbd;
428}
429thead, p.table.header {
Junio C Hamanof2b74942012-11-20 21:06:26430 font-weight: bold;
Junio C Hamano9d971152012-12-19 00:43:11431 color: #527bbd;
432}
433p.table {
434 margin-top: 0;
435}
436/* Because the table frame attribute is overriden by CSS in most browsers. */
437div.tableblock > table[frame="void"] {
438 border-style: none;
439}
440div.tableblock > table[frame="hsides"] {
441 border-left-style: none;
442 border-right-style: none;
443}
444div.tableblock > table[frame="vsides"] {
445 border-top-style: none;
446 border-bottom-style: none;
Junio C Hamanof2b74942012-11-20 21:06:26447}
448
Junio C Hamano9d971152012-12-19 00:43:11449
450/*
451 * html5 specific
452 *
453 * */
454
Junio C Hamanobc8d4782014-01-13 23:35:50455.monospaced {
456 font-family: monospace;
457 font-size: inherit;
458 color: navy;
459}
460
Junio C Hamano9d971152012-12-19 00:43:11461table.tableblock {
462 margin-top: 1.0em;
463 margin-bottom: 1.5em;
464}
465thead, p.tableblock.header {
466 font-weight: bold;
467 color: #527bbd;
468}
469p.tableblock {
470 margin-top: 0;
471}
472table.tableblock {
473 border-width: 3px;
474 border-spacing: 0px;
475 border-style: solid;
476 border-color: #527bbd;
477 border-collapse: collapse;
478}
479th.tableblock, td.tableblock {
480 border-width: 1px;
481 padding: 4px;
482 border-style: solid;
483 border-color: #527bbd;
Junio C Hamanof2b74942012-11-20 21:06:26484}
485
Junio C Hamano9d971152012-12-19 00:43:11486table.tableblock.frame-topbot {
487 border-left-style: hidden;
488 border-right-style: hidden;
489}
490table.tableblock.frame-sides {
491 border-top-style: hidden;
492 border-bottom-style: hidden;
493}
494table.tableblock.frame-none {
495 border-style: hidden;
496}
497
498th.tableblock.halign-left, td.tableblock.halign-left {
499 text-align: left;
500}
501th.tableblock.halign-center, td.tableblock.halign-center {
502 text-align: center;
503}
504th.tableblock.halign-right, td.tableblock.halign-right {
Junio C Hamanof2b74942012-11-20 21:06:26505 text-align: right;
506}
507
Junio C Hamano9d971152012-12-19 00:43:11508th.tableblock.valign-top, td.tableblock.valign-top {
509 vertical-align: top;
Junio C Hamanof2b74942012-11-20 21:06:26510}
Junio C Hamano9d971152012-12-19 00:43:11511th.tableblock.valign-middle, td.tableblock.valign-middle {
512 vertical-align: middle;
513}
514th.tableblock.valign-bottom, td.tableblock.valign-bottom {
515 vertical-align: bottom;
Junio C Hamanof2b74942012-11-20 21:06:26516}
517
Junio C Hamano9d971152012-12-19 00:43:11518
519/*
520 * manpage specific
521 *
522 * */
523
524body.manpage h1 {
525 padding-top: 0.5em;
526 padding-bottom: 0.5em;
527 border-top: 2px solid silver;
528 border-bottom: 2px solid silver;
529}
530body.manpage h2 {
531 border-style: none;
532}
533body.manpage div.sectionbody {
534 margin-left: 3em;
Junio C Hamanof2b74942012-11-20 21:06:26535}
536
Junio C Hamano9d971152012-12-19 00:43:11537@media print {
538 body.manpage div#toc { display: none; }
539}
Junio C Hamanof2b74942012-11-20 21:06:26540</style>
541<script type="text/javascript">
542/*<![CDATA[*/
Junio C Hamanof2b74942012-11-20 21:06:26543var asciidoc = { // Namespace.
544
545/////////////////////////////////////////////////////////////////////
546// Table Of Contents generator
547/////////////////////////////////////////////////////////////////////
548
549/* Author: Mihai Bazon, September 2002
550 * http://students.infoiasi.ro/~mishoo
551 *
552 * Table Of Content generator
553 * Version: 0.4
554 *
555 * Feel free to use this script under the terms of the GNU General Public
556 * License, as long as you do not remove or alter this notice.
557 */
558
559 /* modified by Troy D. Hanson, September 2006. License: GPL */
560 /* modified by Stuart Rackham, 2006, 2009. License: GPL */
561
562// toclevels = 1..4.
563toc: function (toclevels) {
564
565 function getText(el) {
566 var text = "";
567 for (var i = el.firstChild; i != null; i = i.nextSibling) {
568 if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
569 text += i.data;
570 else if (i.firstChild != null)
571 text += getText(i);
572 }
573 return text;
574 }
575
576 function TocEntry(el, text, toclevel) {
577 this.element = el;
578 this.text = text;
579 this.toclevel = toclevel;
580 }
581
582 function tocEntries(el, toclevels) {
583 var result = new Array;
Junio C Hamanobc8d4782014-01-13 23:35:50584 var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
Junio C Hamanof2b74942012-11-20 21:06:26585 // Function that scans the DOM tree for header elements (the DOM2
586 // nodeIterator API would be a better technique but not supported by all
587 // browsers).
588 var iterate = function (el) {
589 for (var i = el.firstChild; i != null; i = i.nextSibling) {
590 if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
591 var mo = re.exec(i.tagName);
592 if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
593 result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
594 }
595 iterate(i);
596 }
597 }
598 }
599 iterate(el);
600 return result;
601 }
602
603 var toc = document.getElementById("toc");
Junio C Hamano9d971152012-12-19 00:43:11604 if (!toc) {
605 return;
606 }
607
608 // Delete existing TOC entries in case we're reloading the TOC.
609 var tocEntriesToRemove = [];
610 var i;
611 for (i = 0; i < toc.childNodes.length; i++) {
612 var entry = toc.childNodes[i];
Junio C Hamanobc8d4782014-01-13 23:35:50613 if (entry.nodeName == 'div'
Junio C Hamano9d971152012-12-19 00:43:11614 && entry.getAttribute("class")
615 && entry.getAttribute("class").match(/^toclevel/))
616 tocEntriesToRemove.push(entry);
617 }
618 for (i = 0; i < tocEntriesToRemove.length; i++) {
619 toc.removeChild(tocEntriesToRemove[i]);
620 }
621
622 // Rebuild TOC entries.
Junio C Hamanof2b74942012-11-20 21:06:26623 var entries = tocEntries(document.getElementById("content"), toclevels);
624 for (var i = 0; i < entries.length; ++i) {
625 var entry = entries[i];
626 if (entry.element.id == "")
627 entry.element.id = "_toc_" + i;
628 var a = document.createElement("a");
629 a.href = "#" + entry.element.id;
630 a.appendChild(document.createTextNode(entry.text));
631 var div = document.createElement("div");
632 div.appendChild(a);
633 div.className = "toclevel" + entry.toclevel;
634 toc.appendChild(div);
635 }
636 if (entries.length == 0)
637 toc.parentNode.removeChild(toc);
638},
639
640
641/////////////////////////////////////////////////////////////////////
642// Footnotes generator
643/////////////////////////////////////////////////////////////////////
644
645/* Based on footnote generation code from:
646 * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
647 */
648
649footnotes: function () {
Junio C Hamano9d971152012-12-19 00:43:11650 // Delete existing footnote entries in case we're reloading the footnodes.
651 var i;
Junio C Hamanof2b74942012-11-20 21:06:26652 var noteholder = document.getElementById("footnotes");
Junio C Hamano9d971152012-12-19 00:43:11653 if (!noteholder) {
654 return;
655 }
656 var entriesToRemove = [];
657 for (i = 0; i < noteholder.childNodes.length; i++) {
658 var entry = noteholder.childNodes[i];
Junio C Hamanobc8d4782014-01-13 23:35:50659 if (entry.nodeName == 'div' && entry.getAttribute("class") == "footnote")
Junio C Hamano9d971152012-12-19 00:43:11660 entriesToRemove.push(entry);
661 }
662 for (i = 0; i < entriesToRemove.length; i++) {
663 noteholder.removeChild(entriesToRemove[i]);
664 }
665
666 // Rebuild footnote entries.
667 var cont = document.getElementById("content");
Junio C Hamanof2b74942012-11-20 21:06:26668 var spans = cont.getElementsByTagName("span");
669 var refs = {};
670 var n = 0;
671 for (i=0; i<spans.length; i++) {
672 if (spans[i].className == "footnote") {
673 n++;
Junio C Hamano9d971152012-12-19 00:43:11674 var note = spans[i].getAttribute("data-note");
675 if (!note) {
676 // Use [\s\S] in place of . so multi-line matches work.
677 // Because JavaScript has no s (dotall) regex flag.
678 note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
679 spans[i].innerHTML =
680 "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
681 "' title='View footnote' class='footnote'>" + n + "</a>]";
682 spans[i].setAttribute("data-note", note);
683 }
Junio C Hamanof2b74942012-11-20 21:06:26684 noteholder.innerHTML +=
685 "<div class='footnote' id='_footnote_" + n + "'>" +
686 "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
687 n + "</a>. " + note + "</div>";
Junio C Hamanof2b74942012-11-20 21:06:26688 var id =spans[i].getAttribute("id");
689 if (id != null) refs["#"+id] = n;
690 }
691 }
692 if (n == 0)
693 noteholder.parentNode.removeChild(noteholder);
694 else {
695 // Process footnoterefs.
696 for (i=0; i<spans.length; i++) {
697 if (spans[i].className == "footnoteref") {
698 var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
699 href = href.match(/#.*/)[0]; // Because IE return full URL.
700 n = refs[href];
701 spans[i].innerHTML =
702 "[<a href='#_footnote_" + n +
703 "' title='View footnote' class='footnote'>" + n + "</a>]";
704 }
705 }
706 }
Junio C Hamano9d971152012-12-19 00:43:11707},
708
709install: function(toclevels) {
710 var timerId;
711
712 function reinstall() {
713 asciidoc.footnotes();
714 if (toclevels) {
715 asciidoc.toc(toclevels);
716 }
717 }
718
719 function reinstallAndRemoveTimer() {
720 clearInterval(timerId);
721 reinstall();
722 }
723
724 timerId = setInterval(reinstall, 500);
725 if (document.addEventListener)
726 document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
727 else
728 window.onload = reinstallAndRemoveTimer;
Junio C Hamanof2b74942012-11-20 21:06:26729}
730
731}
Junio C Hamano9d971152012-12-19 00:43:11732asciidoc.install();
Junio C Hamanof2b74942012-11-20 21:06:26733/*]]>*/
734</script>
735</head>
Junio C Hamano9d971152012-12-19 00:43:11736<body class="article">
Junio C Hamanof2b74942012-11-20 21:06:26737<div id="header">
Junio C Hamanobc8d4782014-01-13 23:35:50738<h1>Concerning Git&#8217;s Packing Heuristics</h1>
Junio C Hamanof2b74942012-11-20 21:06:26739</div>
740<div id="content">
Junio C Hamanobc8d4782014-01-13 23:35:50741<div id="preamble">
742<div class="sectionbody">
Junio C Hamanof2b74942012-11-20 21:06:26743<div class="literalblock">
744<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50745<pre><tt>Oh, here's a really stupid question:</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26746</div></div>
747<div class="literalblock">
748<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50749<pre><tt> Where do I go
Junio C Hamanof2b74942012-11-20 21:06:26750 to learn the details
Junio C Hamanobc8d4782014-01-13 23:35:50751of Git's packing heuristics?</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26752</div></div>
753<div class="paragraph"><p>Be careful what you ask!</p></div>
Junio C Hamano076ffcc2013-02-06 05:13:21754<div class="paragraph"><p>Followers of the Git, please open the Git IRC Log and turn to
Junio C Hamanof2b74942012-11-20 21:06:26755February 10, 2006.</p></div>
756<div class="paragraph"><p>It&#8217;s a rare occasion, and we are joined by the King Git Himself,
757Linus Torvalds (linus). Nathaniel Smith, (njs`), has the floor
758and seeks enlightenment. Others are present, but silent.</p></div>
759<div class="paragraph"><p>Let&#8217;s listen in!</p></div>
760<div class="literalblock">
761<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50762<pre><tt>&lt;njs`&gt; Oh, here's a really stupid question -- where do I go to
Junio C Hamano076ffcc2013-02-06 05:13:21763 learn the details of Git's packing heuristics? google avails
Junio C Hamanof2b74942012-11-20 21:06:26764 me not, reading the source didn't help a lot, and wading
765 through the whole mailing list seems less efficient than any
Junio C Hamanobc8d4782014-01-13 23:35:50766 of that.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26767</div></div>
768<div class="paragraph"><p>It is a bold start! A plea for help combined with a simultaneous
769tri-part attack on some of the tried and true mainstays in the quest
770for enlightenment. Brash accusations of google being useless. Hubris!
771Maligning the source. Heresy! Disdain for the mailing list archives.
772Woe.</p></div>
773<div class="literalblock">
774<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50775<pre><tt>&lt;pasky&gt; yes, the packing-related delta stuff is somewhat
776 mysterious even for me ;)</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26777</div></div>
778<div class="paragraph"><p>Ah! Modesty after all.</p></div>
779<div class="literalblock">
780<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50781<pre><tt>&lt;linus&gt; njs, I don't think the docs exist. That's something where
Junio C Hamanof2b74942012-11-20 21:06:26782 I don't think anybody else than me even really got involved.
Junio C Hamano076ffcc2013-02-06 05:13:21783 Most of the rest of Git others have been busy with (especially
Junio C Hamanobc8d4782014-01-13 23:35:50784 Junio), but packing nobody touched after I did it.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26785</div></div>
786<div class="paragraph"><p>It&#8217;s cryptic, yet vague. Linus in style for sure. Wise men
787interpret this as an apology. A few argue it is merely a
788statement of fact.</p></div>
789<div class="literalblock">
790<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50791<pre><tt>&lt;njs`&gt; I guess the next step is "read the source again", but I
792 have to build up a certain level of gumption first :-)</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26793</div></div>
794<div class="paragraph"><p>Indeed! On both points.</p></div>
795<div class="literalblock">
796<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50797<pre><tt>&lt;linus&gt; The packing heuristic is actually really really simple.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26798</div></div>
799<div class="paragraph"><p>Bait&#8230;</p></div>
800<div class="literalblock">
801<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50802<pre><tt>&lt;linus&gt; But strange.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26803</div></div>
804<div class="paragraph"><p>And switch. That ought to do it!</p></div>
805<div class="literalblock">
806<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50807<pre><tt>&lt;linus&gt; Remember: Git really doesn't follow files. So what it does is
Junio C Hamanof2b74942012-11-20 21:06:26808 - generate a list of all objects
809 - sort the list according to magic heuristics
810 - walk the list, using a sliding window, seeing if an object
811 can be diffed against another object in the window
Junio C Hamanobc8d4782014-01-13 23:35:50812 - write out the list in recency order</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26813</div></div>
814<div class="paragraph"><p>The traditional understatement:</p></div>
815<div class="literalblock">
816<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50817<pre><tt>&lt;njs`&gt; I suspect that what I'm missing is the precise definition of
818 the word "magic"</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26819</div></div>
820<div class="paragraph"><p>The traditional insight:</p></div>
821<div class="literalblock">
822<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50823<pre><tt>&lt;pasky&gt; yes</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26824</div></div>
825<div class="paragraph"><p>And Babel-like confusion flowed.</p></div>
826<div class="literalblock">
827<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50828<pre><tt>&lt;njs`&gt; oh, hmm, and I'm not sure what this sliding window means either</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26829</div></div>
830<div class="literalblock">
831<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50832<pre><tt>&lt;pasky&gt; iirc, it appeared to me to be just the sha1 of the object
833 when reading the code casually ...</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26834</div></div>
835<div class="olist lowerroman"><ol class="lowerroman">
836<li>
837<p>
838which simply doesn&#8217;t sound as a very good heuristics, though ;)
839</p>
840<div class="literalblock">
841<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50842<pre><tt>&lt;njs`&gt; .....and recency order. okay, I think it's clear I didn't
843 even realize how much I wasn't realizing :-)</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26844</div></div>
845</li>
846</ol></div>
847<div class="paragraph"><p>Ah, grasshopper! And thus the enlightenment begins anew.</p></div>
848<div class="literalblock">
849<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50850<pre><tt>&lt;linus&gt; The "magic" is actually in theory totally arbitrary.
Junio C Hamanof2b74942012-11-20 21:06:26851 ANY order will give you a working pack, but no, it's not
Junio C Hamanobc8d4782014-01-13 23:35:50852 ordered by SHA-1.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26853</div></div>
854<div class="literalblock">
855<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50856<pre><tt>Before talking about the ordering for the sliding delta
Junio C Hamanof2b74942012-11-20 21:06:26857window, let's talk about the recency order. That's more
Junio C Hamanobc8d4782014-01-13 23:35:50858important in one way.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26859</div></div>
860<div class="literalblock">
861<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50862<pre><tt>&lt;njs`&gt; Right, but if all you want is a working way to pack things
Junio C Hamanof2b74942012-11-20 21:06:26863 together, you could just use cat and save yourself some
Junio C Hamanobc8d4782014-01-13 23:35:50864 trouble...</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26865</div></div>
866<div class="paragraph"><p>Waaait for it&#8230;.</p></div>
867<div class="literalblock">
868<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50869<pre><tt>&lt;linus&gt; The recency ordering (which is basically: put objects
Junio C Hamanof2b74942012-11-20 21:06:26870 _physically_ into the pack in the order that they are
Junio C Hamanobc8d4782014-01-13 23:35:50871 "reachable" from the head) is important.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26872</div></div>
873<div class="literalblock">
874<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50875<pre><tt>&lt;njs`&gt; okay</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26876</div></div>
877<div class="literalblock">
878<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50879<pre><tt>&lt;linus&gt; It's important because that's the thing that gives packs
Junio C Hamanof2b74942012-11-20 21:06:26880 good locality. It keeps the objects close to the head (whether
881 they are old or new, but they are _reachable_ from the head)
882 at the head of the pack. So packs actually have absolutely
Junio C Hamanobc8d4782014-01-13 23:35:50883 _wonderful_ IO patterns.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26884</div></div>
885<div class="paragraph"><p>Read that again, because it is important.</p></div>
886<div class="literalblock">
887<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50888<pre><tt>&lt;linus&gt; But recency ordering is totally useless for deciding how
Junio C Hamanof2b74942012-11-20 21:06:26889 to actually generate the deltas, so the delta ordering is
Junio C Hamanobc8d4782014-01-13 23:35:50890 something else.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26891</div></div>
892<div class="literalblock">
893<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50894<pre><tt>The delta ordering is (wait for it):
Junio C Hamanof2b74942012-11-20 21:06:26895- first sort by the "basename" of the object, as defined by
896 the name the object was _first_ reached through when
897 generating the object list
898- within the same basename, sort by size of the object
Junio C Hamanobc8d4782014-01-13 23:35:50899- but always sort different types separately (commits first).</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26900</div></div>
901<div class="literalblock">
902<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50903<pre><tt>That's not exactly it, but it's very close.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26904</div></div>
905<div class="literalblock">
906<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50907<pre><tt>&lt;njs`&gt; The "_first_ reached" thing is not too important, just you
Junio C Hamanof2b74942012-11-20 21:06:26908 need some way to break ties since the same objects may be
Junio C Hamanobc8d4782014-01-13 23:35:50909 reachable many ways, yes?</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26910</div></div>
911<div class="paragraph"><p>And as if to clarify:</p></div>
912<div class="literalblock">
913<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50914<pre><tt>&lt;linus&gt; The point is that it's all really just any random
Junio C Hamanof2b74942012-11-20 21:06:26915 heuristic, and the ordering is totally unimportant for
916 correctness, but it helps a lot if the heuristic gives
917 "clumping" for things that are likely to delta well against
Junio C Hamanobc8d4782014-01-13 23:35:50918 each other.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26919</div></div>
920<div class="paragraph"><p>It is an important point, so secretly, I did my own research and have
921included my results below. To be fair, it has changed some over time.
922And through the magic of Revisionistic History, I draw upon this entry
923from The Git IRC Logs on my father&#8217;s birthday, March 1:</p></div>
924<div class="literalblock">
925<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50926<pre><tt>&lt;gitster&gt; The quote from the above linus should be rewritten a
Junio C Hamanof2b74942012-11-20 21:06:26927 bit (wait for it):
928 - first sort by type. Different objects never delta with
929 each other.
930 - then sort by filename/dirname. hash of the basename
931 occupies the top BITS_PER_INT-DIR_BITS bits, and bottom
932 DIR_BITS are for the hash of leading path elements.
933 - then if we are doing "thin" pack, the objects we are _not_
934 going to pack but we know about are sorted earlier than
935 other objects.
Junio C Hamanobc8d4782014-01-13 23:35:50936 - and finally sort by size, larger to smaller.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26937</div></div>
938<div class="paragraph"><p>In one swell-foop, clarification and obscurification! Nonetheless,
939authoritative. Cryptic, yet concise. It even solicits notions of
940quotes from The Source Code. Clearly, more study is needed.</p></div>
941<div class="literalblock">
942<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50943<pre><tt>&lt;gitster&gt; That's the sort order. What this means is:
Junio C Hamanof2b74942012-11-20 21:06:26944 - we do not delta different object types.
945 - we prefer to delta the objects with the same full path, but
946 allow files with the same name from different directories.
947 - we always prefer to delta against objects we are not going
948 to send, if there are some.
949 - we prefer to delta against larger objects, so that we have
Junio C Hamanobc8d4782014-01-13 23:35:50950 lots of removals.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26951</div></div>
952<div class="literalblock">
953<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50954<pre><tt>The penultimate rule is for "thin" packs. It is used when
955the other side is known to have such objects.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26956</div></div>
957<div class="paragraph"><p>There it is again. "Thin" packs. I&#8217;m thinking to myself, "What
958is a <em>thin</em> pack?" So I ask:</p></div>
959<div class="literalblock">
960<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50961<pre><tt>&lt;jdl&gt; What is a "thin" pack?</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26962</div></div>
963<div class="literalblock">
964<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50965<pre><tt>&lt;gitster&gt; Use of --objects-edge to rev-list as the upstream of
966 pack-objects. The pack transfer protocol negotiates that.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26967</div></div>
968<div class="paragraph"><p>Woo hoo! Cleared that <em>right</em> up!</p></div>
969<div class="literalblock">
970<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50971<pre><tt>&lt;gitster&gt; There are two directions - push and fetch.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26972</div></div>
973<div class="paragraph"><p>There! Did you see it? It is not <em>"push" and "pull"</em>! How often the
974confusion has started here. So casually mentioned, too!</p></div>
975<div class="literalblock">
976<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50977<pre><tt>&lt;gitster&gt; For push, git-send-pack invokes git-receive-pack on the
Junio C Hamanof2b74942012-11-20 21:06:26978 other end. The receive-pack says "I have up to these commits".
979 send-pack looks at them, and computes what are missing from
Junio C Hamanobc8d4782014-01-13 23:35:50980 the other end. So "thin" could be the default there.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26981</div></div>
982<div class="literalblock">
983<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50984<pre><tt>In the other direction, fetch, git-fetch-pack and
Junio C Hamanof2b74942012-11-20 21:06:26985git-clone-pack invokes git-upload-pack on the other end
Junio C Hamanobc8d4782014-01-13 23:35:50986(via ssh or by talking to the daemon).</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26987</div></div>
988<div class="literalblock">
989<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50990<pre><tt>There are two cases: fetch-pack with -k and clone-pack is one,
Junio C Hamanof2b74942012-11-20 21:06:26991fetch-pack without -k is the other. clone-pack and fetch-pack
992with -k will keep the downloaded packfile without expanded, so
993we do not use thin pack transfer. Otherwise, the generated
Junio C Hamanobc8d4782014-01-13 23:35:50994pack will have delta without base object in the same pack.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:26995</div></div>
996<div class="literalblock">
997<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:50998<pre><tt>But fetch-pack without -k will explode the received pack into
Junio C Hamanof2b74942012-11-20 21:06:26999individual objects, so we automatically ask upload-pack to
Junio C Hamanobc8d4782014-01-13 23:35:501000give us a thin pack if upload-pack supports it.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261001</div></div>
1002<div class="paragraph"><p>OK then.</p></div>
1003<div class="paragraph"><p>Uh.</p></div>
1004<div class="paragraph"><p>Let&#8217;s return to the previous conversation still in progress.</p></div>
1005<div class="literalblock">
1006<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501007<pre><tt>&lt;njs`&gt; and "basename" means something like "the tail of end of
Junio C Hamanof2b74942012-11-20 21:06:261008 path of file objects and dir objects, as per basename(3), and
1009 we just declare all commit and tag objects to have the same
Junio C Hamanobc8d4782014-01-13 23:35:501010 basename" or something?</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261011</div></div>
1012<div class="paragraph"><p>Luckily, that too is a point that gitster clarified for us!</p></div>
1013<div class="paragraph"><p>If I might add, the trick is to make files that <em>might</em> be similar be
1014located close to each other in the hash buckets based on their file
1015names. It used to be that "foo/Makefile", "bar/baz/quux/Makefile" and
1016"Makefile" all landed in the same bucket due to their common basename,
1017"Makefile". However, now they land in "close" buckets.</p></div>
1018<div class="paragraph"><p>The algorithm allows not just for the <em>same</em> bucket, but for <em>close</em>
1019buckets to be considered delta candidates. The rationale is
1020essentially that files, like Makefiles, often have very similar
1021content no matter what directory they live in.</p></div>
1022<div class="literalblock">
1023<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501024<pre><tt>&lt;linus&gt; I played around with different delta algorithms, and with
Junio C Hamanof2b74942012-11-20 21:06:261025 making the "delta window" bigger, but having too big of a
1026 sliding window makes it very expensive to generate the pack:
Junio C Hamanobc8d4782014-01-13 23:35:501027 you need to compare every object with a _ton_ of other objects.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261028</div></div>
1029<div class="literalblock">
1030<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501031<pre><tt>There are a number of other trivial heuristics too, which
Junio C Hamanof2b74942012-11-20 21:06:261032basically boil down to "don't bother even trying to delta this
1033pair" if we can tell before-hand that the delta isn't worth it
1034(due to size differences, where we can take a previous delta
1035result into account to decide that "ok, no point in trying
Junio C Hamanobc8d4782014-01-13 23:35:501036that one, it will be worse").</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261037</div></div>
1038<div class="literalblock">
1039<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501040<pre><tt>End result: packing is actually very size efficient. It's
Junio C Hamanof2b74942012-11-20 21:06:261041somewhat CPU-wasteful, but on the other hand, since you're
1042really only supposed to do it maybe once a month (and you can
Junio C Hamanobc8d4782014-01-13 23:35:501043do it during the night), nobody really seems to care.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261044</div></div>
1045<div class="paragraph"><p>Nice Engineering Touch, there. Find when it doesn&#8217;t matter, and
1046proclaim it a non-issue. Good style too!</p></div>
1047<div class="literalblock">
1048<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501049<pre><tt>&lt;njs`&gt; So, just to repeat to see if I'm following, we start by
Junio C Hamanof2b74942012-11-20 21:06:261050 getting a list of the objects we want to pack, we sort it by
1051 this heuristic (basically lexicographically on the tuple
Junio C Hamanobc8d4782014-01-13 23:35:501052 (type, basename, size)).</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261053</div></div>
1054<div class="literalblock">
1055<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501056<pre><tt>Then we walk through this list, and calculate a delta of
Junio C Hamanof2b74942012-11-20 21:06:261057each object against the last n (tunable parameter) objects,
Junio C Hamanobc8d4782014-01-13 23:35:501058and pick the smallest of these deltas.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261059</div></div>
1060<div class="paragraph"><p>Vastly simplified, but the essence is there!</p></div>
1061<div class="literalblock">
1062<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501063<pre><tt>&lt;linus&gt; Correct.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261064</div></div>
1065<div class="literalblock">
1066<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501067<pre><tt>&lt;njs`&gt; And then once we have picked a delta or fulltext to
Junio C Hamanof2b74942012-11-20 21:06:261068 represent each object, we re-sort by recency, and write them
Junio C Hamanobc8d4782014-01-13 23:35:501069 out in that order.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261070</div></div>
1071<div class="literalblock">
1072<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501073<pre><tt>&lt;linus&gt; Yup. Some other small details:</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261074</div></div>
1075<div class="paragraph"><p>And of course there is the "Other Shoe" Factor too.</p></div>
1076<div class="literalblock">
1077<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501078<pre><tt>&lt;linus&gt; - We limit the delta depth to another magic value (right
1079 now both the window and delta depth magic values are just "10")</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261080</div></div>
1081<div class="literalblock">
1082<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501083<pre><tt>&lt;njs`&gt; Hrm, my intuition is that you'd end up with really _bad_ IO
Junio C Hamanof2b74942012-11-20 21:06:261084 patterns, because the things you want are near by, but to
1085 actually reconstruct them you may have to jump all over in
Junio C Hamanobc8d4782014-01-13 23:35:501086 random ways.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261087</div></div>
1088<div class="literalblock">
1089<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501090<pre><tt>&lt;linus&gt; - When we write out a delta, and we haven't yet written
Junio C Hamanof2b74942012-11-20 21:06:261091 out the object it is a delta against, we write out the base
1092 object first. And no, when we reconstruct them, we actually
1093 get nice IO patterns, because:
1094 - larger objects tend to be "more recent" (Linus' law: files grow)
1095 - we actively try to generate deltas from a larger object to a
1096 smaller one
1097 - this means that the top-of-tree very seldom has deltas
Junio C Hamanobc8d4782014-01-13 23:35:501098 (i.e. deltas in _practice_ are "backwards deltas")</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261099</div></div>
1100<div class="paragraph"><p>Again, we should reread that whole paragraph. Not just because
1101Linus has slipped Linus&#8217;s Law in there on us, but because it is
1102important. Let&#8217;s make sure we clarify some of the points here:</p></div>
1103<div class="literalblock">
1104<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501105<pre><tt>&lt;njs`&gt; So the point is just that in practice, delta order and
1106 recency order match each other quite well.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261107</div></div>
1108<div class="literalblock">
1109<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501110<pre><tt>&lt;linus&gt; Yes. There's another nice side to this (and yes, it was
Junio C Hamanof2b74942012-11-20 21:06:261111 designed that way ;):
1112 - the reason we generate deltas against the larger object is
Junio C Hamanobc8d4782014-01-13 23:35:501113 actually a big space saver too!</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261114</div></div>
1115<div class="literalblock">
1116<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501117<pre><tt>&lt;njs`&gt; Hmm, but your last comment (if "we haven't yet written out
Junio C Hamanof2b74942012-11-20 21:06:261118 the object it is a delta against, we write out the base object
1119 first"), seems like it would make these facts mostly
1120 irrelevant because even if in practice you would not have to
1121 wander around much, in fact you just brute-force say that in
Junio C Hamanobc8d4782014-01-13 23:35:501122 the cases where you might have to wander, don't do that :-)</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261123</div></div>
1124<div class="literalblock">
1125<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501126<pre><tt>&lt;linus&gt; Yes and no. Notice the rule: we only write out the base
Junio C Hamanof2b74942012-11-20 21:06:261127 object first if the delta against it was more recent. That
1128 means that you can actually have deltas that refer to a base
1129 object that is _not_ close to the delta object, but that only
Junio C Hamanobc8d4782014-01-13 23:35:501130 happens when the delta is needed to generate an _old_ object.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261131</div></div>
1132<div class="literalblock">
1133<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501134<pre><tt>&lt;linus&gt; See?</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261135</div></div>
1136<div class="paragraph"><p>Yeah, no. I missed that on the first two or three readings myself.</p></div>
1137<div class="literalblock">
1138<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501139<pre><tt>&lt;linus&gt; This keeps the front of the pack dense. The front of the
Junio C Hamanof2b74942012-11-20 21:06:261140 pack never contains data that isn't relevant to a "recent"
1141 object. The size optimization comes from our use of xdelta
1142 (but is true for many other delta algorithms): removing data
Junio C Hamanobc8d4782014-01-13 23:35:501143 is cheaper (in size) than adding data.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261144</div></div>
1145<div class="literalblock">
1146<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501147<pre><tt>When you remove data, you only need to say "copy bytes n--m".
Junio C Hamanof2b74942012-11-20 21:06:261148In contrast, in a delta that _adds_ data, you have to say "add
Junio C Hamanobc8d4782014-01-13 23:35:501149these bytes: 'actual data goes here'"</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261150</div></div>
1151<div class="ulist"><ul>
1152<li>
1153<p>
1154njs` has quit: Read error: 104 (Connection reset by peer)
1155</p>
1156<div class="literalblock">
1157<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501158<pre><tt>&lt;linus&gt; Uhhuh. I hope I didn't blow njs` mind.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261159</div></div>
1160</li>
1161<li>
1162<p>
1163njs` has joined channel #git
1164</p>
1165<div class="literalblock">
1166<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501167<pre><tt>&lt;pasky&gt; :)</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261168</div></div>
1169</li>
1170</ul></div>
1171<div class="paragraph"><p>The silent observers are amused. Of course.</p></div>
1172<div class="paragraph"><p>And as if njs` was expected to be omniscient:</p></div>
1173<div class="literalblock">
1174<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501175<pre><tt>&lt;linus&gt; njs - did you miss anything?</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261176</div></div>
1177<div class="paragraph"><p>OK, I&#8217;ll spell it out. That&#8217;s Geek Humor. If njs` was not actually
1178connected for a little bit there, how would he know if missed anything
1179while he was disconnected? He&#8217;s a benevolent dictator with a sense of
1180humor! Well noted!</p></div>
1181<div class="literalblock">
1182<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501183<pre><tt>&lt;njs`&gt; Stupid router. Or gremlins, or whatever.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261184</div></div>
1185<div class="paragraph"><p>It&#8217;s a cheap shot at Cisco. Take 'em when you can.</p></div>
1186<div class="literalblock">
1187<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501188<pre><tt>&lt;njs`&gt; Yes and no. Notice the rule: we only write out the base
1189 object first if the delta against it was more recent.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261190</div></div>
1191<div class="literalblock">
1192<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501193<pre><tt>I'm getting lost in all these orders, let me re-read :-)
Junio C Hamanof2b74942012-11-20 21:06:261194So the write-out order is from most recent to least recent?
1195(Conceivably it could be the opposite way too, I'm not sure if
1196we've said) though my connection back at home is logging, so I
Junio C Hamanobc8d4782014-01-13 23:35:501197can just read what you said there :-)</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261198</div></div>
1199<div class="paragraph"><p>And for those of you paying attention, the Omniscient Trick has just
1200been detailed!</p></div>
1201<div class="literalblock">
1202<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501203<pre><tt>&lt;linus&gt; Yes, we always write out most recent first</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261204</div></div>
Junio C Hamanof2b74942012-11-20 21:06:261205<div class="literalblock">
1206<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501207<pre><tt>&lt;njs`&gt; And, yeah, I got the part about deeper-in-history stuff
1208 having worse IO characteristics, one sort of doesn't care.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261209</div></div>
1210<div class="literalblock">
1211<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501212<pre><tt>&lt;linus&gt; With the caveat that if the "most recent" needs an older
Junio C Hamanof2b74942012-11-20 21:06:261213 object to delta against (hey, shrinking sometimes does
Junio C Hamanobc8d4782014-01-13 23:35:501214 happen), we write out the old object with the delta.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261215</div></div>
1216<div class="literalblock">
1217<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501218<pre><tt>&lt;njs`&gt; (if only it happened more...)</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261219</div></div>
1220<div class="literalblock">
1221<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501222<pre><tt>&lt;linus&gt; Anyway, the pack-file could easily be denser still, but
Junio C Hamano076ffcc2013-02-06 05:13:211223 because it's used both for streaming (the Git protocol) and
Junio C Hamanobc8d4782014-01-13 23:35:501224 for on-disk, it has a few pessimizations.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261225</div></div>
1226<div class="paragraph"><p>Actually, it is a made-up word. But it is a made-up word being
1227used as setup for a later optimization, which is a real word:</p></div>
1228<div class="literalblock">
1229<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501230<pre><tt>&lt;linus&gt; In particular, while the pack-file is then compressed,
Junio C Hamanof2b74942012-11-20 21:06:261231 it's compressed just one object at a time, so the actual
1232 compression factor is less than it could be in theory. But it
1233 means that it's all nice random-access with a simple index to
Junio C Hamanobc8d4782014-01-13 23:35:501234 do "object name-&gt;location in packfile" translation.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261235</div></div>
1236<div class="literalblock">
1237<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501238<pre><tt>&lt;njs`&gt; I'm assuming the real win for delta-ing large-&gt;small is
1239 more homogeneous statistics for gzip to run over?</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261240</div></div>
1241<div class="literalblock">
1242<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501243<pre><tt>(You have to put the bytes in one place or another, but
1244putting them in a larger blob wins on compression)</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261245</div></div>
1246<div class="literalblock">
1247<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501248<pre><tt>Actually, what is the compression strategy -- each delta
Junio C Hamanof2b74942012-11-20 21:06:261249individually gzipped, the whole file gzipped, somewhere in
Junio C Hamanobc8d4782014-01-13 23:35:501250between, no compression at all, ....?</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261251</div></div>
1252<div class="literalblock">
1253<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501254<pre><tt>Right.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261255</div></div>
1256<div class="paragraph"><p>Reality IRC sets in. For example:</p></div>
1257<div class="literalblock">
1258<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501259<pre><tt>&lt;pasky&gt; I'll read the rest in the morning, I really have to go
Junio C Hamanof2b74942012-11-20 21:06:261260 sleep or there's no hope whatsoever for me at the today's
Junio C Hamanobc8d4782014-01-13 23:35:501261 exam... g'nite all.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261262</div></div>
1263<div class="paragraph"><p>Heh.</p></div>
1264<div class="literalblock">
1265<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501266<pre><tt>&lt;linus&gt; pasky: g'nite</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261267</div></div>
1268<div class="literalblock">
1269<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501270<pre><tt>&lt;njs`&gt; pasky: 'luck</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261271</div></div>
1272<div class="literalblock">
1273<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501274<pre><tt>&lt;linus&gt; Right: large-&gt;small matters exactly because of compression
Junio C Hamanof2b74942012-11-20 21:06:261275 behaviour. If it was non-compressed, it probably wouldn't make
Junio C Hamanobc8d4782014-01-13 23:35:501276 any difference.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261277</div></div>
1278<div class="literalblock">
1279<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501280<pre><tt>&lt;njs`&gt; yeah</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261281</div></div>
1282<div class="literalblock">
1283<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501284<pre><tt>&lt;linus&gt; Anyway: I'm not even trying to claim that the pack-files
Junio C Hamanof2b74942012-11-20 21:06:261285 are perfect, but they do tend to have a nice balance of
Junio C Hamanobc8d4782014-01-13 23:35:501286 density vs ease-of use.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261287</div></div>
1288<div class="paragraph"><p>Gasp! OK, saved. That&#8217;s a fair Engineering trade off. Close call!
1289In fact, Linus reflects on some Basic Engineering Fundamentals,
1290design options, etc.</p></div>
1291<div class="literalblock">
1292<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501293<pre><tt>&lt;linus&gt; More importantly, they allow Git to still _conceptually_
1294 never deal with deltas at all, and be a "whole object" store.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261295</div></div>
1296<div class="literalblock">
1297<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501298<pre><tt>Which has some problems (we discussed bad huge-file
Junio C Hamano076ffcc2013-02-06 05:13:211299behaviour on the Git lists the other day), but it does mean
1300that the basic Git concepts are really really simple and
Junio C Hamanobc8d4782014-01-13 23:35:501301straightforward.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261302</div></div>
1303<div class="literalblock">
1304<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501305<pre><tt>It's all been quite stable.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261306</div></div>
1307<div class="literalblock">
1308<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501309<pre><tt>Which I think is very much a result of having very simple
Junio C Hamanof2b74942012-11-20 21:06:261310basic ideas, so that there's never any confusion about what's
Junio C Hamanobc8d4782014-01-13 23:35:501311going on.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261312</div></div>
1313<div class="literalblock">
1314<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501315<pre><tt>Bugs happen, but they are "simple" bugs. And bugs that
Junio C Hamanof2b74942012-11-20 21:06:261316actually get some object store detail wrong are almost always
Junio C Hamanobc8d4782014-01-13 23:35:501317so obvious that they never go anywhere.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261318</div></div>
1319<div class="literalblock">
1320<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501321<pre><tt>&lt;njs`&gt; Yeah.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261322</div></div>
1323<div class="paragraph"><p>Nuff said.</p></div>
1324<div class="literalblock">
1325<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501326<pre><tt>&lt;linus&gt; Anyway. I'm off for bed. It's not 6AM here, but I've got
Junio C Hamanof2b74942012-11-20 21:06:261327 three kids, and have to get up early in the morning to send
Junio C Hamanobc8d4782014-01-13 23:35:501328 them off. I need my beauty sleep.</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261329</div></div>
1330<div class="literalblock">
1331<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501332<pre><tt>&lt;njs`&gt; :-)</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261333</div></div>
1334<div class="literalblock">
1335<div class="content">
Junio C Hamanobc8d4782014-01-13 23:35:501336<pre><tt>&lt;njs`&gt; appreciate the infodump, I really was failing to find the
1337 details on Git packs :-)</tt></pre>
Junio C Hamanof2b74942012-11-20 21:06:261338</div></div>
1339<div class="paragraph"><p>And now you know the rest of the story.</p></div>
1340</div>
Junio C Hamanobc8d4782014-01-13 23:35:501341</div>
1342</div>
Junio C Hamanof2b74942012-11-20 21:06:261343<div id="footnotes"><hr /></div>
1344<div id="footer">
1345<div id="footer-text">
Junio C Hamanobc8d4782014-01-13 23:35:501346Last updated 2014-01-13 15:35:15 PST
Junio C Hamanof2b74942012-11-20 21:06:261347</div>
1348</div>
1349</body>
1350</html>