blob: 1d5ca6c1a60305347e16fe9d5f3a6c7b83a14252 [file] [log] [blame]
Junio C Hamano11ae3202018-08-20 20:15:421<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3 "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
5<head>
6<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
Junio C Hamanoa85030a2022-07-27 16:48:217<meta name="generator" content="AsciiDoc 10.2.0" />
Junio C Hamano11ae3202018-08-20 20:15:428<title>Partial Clone Design Notes</title>
9<style type="text/css">
10/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
11
12/* Default font. */
13body {
14 font-family: Georgia,serif;
15}
16
17/* Title font. */
18h1, h2, h3, h4, h5, h6,
19div.title, caption.title,
20thead, p.table.header,
21#toctitle,
22#author, #revnumber, #revdate, #revremark,
23#footer {
24 font-family: Arial,Helvetica,sans-serif;
25}
26
27body {
28 margin: 1em 5% 1em 5%;
29}
30
31a {
32 color: blue;
33 text-decoration: underline;
34}
35a:visited {
36 color: fuchsia;
37}
38
39em {
40 font-style: italic;
41 color: navy;
42}
43
44strong {
45 font-weight: bold;
46 color: #083194;
47}
48
49h1, h2, h3, h4, h5, h6 {
50 color: #527bbd;
51 margin-top: 1.2em;
52 margin-bottom: 0.5em;
53 line-height: 1.3;
54}
55
56h1, h2, h3 {
57 border-bottom: 2px solid silver;
58}
59h2 {
60 padding-top: 0.5em;
61}
62h3 {
63 float: left;
64}
65h3 + * {
66 clear: left;
67}
68h5 {
69 font-size: 1.0em;
70}
71
72div.sectionbody {
73 margin-left: 0;
74}
75
76hr {
77 border: 1px solid silver;
78}
79
80p {
81 margin-top: 0.5em;
82 margin-bottom: 0.5em;
83}
84
85ul, ol, li > p {
86 margin-top: 0;
87}
88ul > li { color: #aaa; }
89ul > li > * { color: black; }
90
91.monospaced, code, pre {
92 font-family: "Courier New", Courier, monospace;
93 font-size: inherit;
94 color: navy;
95 padding: 0;
96 margin: 0;
97}
98pre {
99 white-space: pre-wrap;
100}
101
102#author {
103 color: #527bbd;
104 font-weight: bold;
105 font-size: 1.1em;
106}
107#email {
108}
109#revnumber, #revdate, #revremark {
110}
111
112#footer {
113 font-size: small;
114 border-top: 2px solid silver;
115 padding-top: 0.5em;
116 margin-top: 4.0em;
117}
118#footer-text {
119 float: left;
120 padding-bottom: 0.5em;
121}
122#footer-badges {
123 float: right;
124 padding-bottom: 0.5em;
125}
126
127#preamble {
128 margin-top: 1.5em;
129 margin-bottom: 1.5em;
130}
131div.imageblock, div.exampleblock, div.verseblock,
132div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
133div.admonitionblock {
134 margin-top: 1.0em;
135 margin-bottom: 1.5em;
136}
137div.admonitionblock {
138 margin-top: 2.0em;
139 margin-bottom: 2.0em;
140 margin-right: 10%;
141 color: #606060;
142}
143
144div.content { /* Block element content. */
145 padding: 0;
146}
147
148/* Block element titles. */
149div.title, caption.title {
150 color: #527bbd;
151 font-weight: bold;
152 text-align: left;
153 margin-top: 1.0em;
154 margin-bottom: 0.5em;
155}
156div.title + * {
157 margin-top: 0;
158}
159
160td div.title:first-child {
161 margin-top: 0.0em;
162}
163div.content div.title:first-child {
164 margin-top: 0.0em;
165}
166div.content + div.title {
167 margin-top: 0.0em;
168}
169
170div.sidebarblock > div.content {
171 background: #ffffee;
172 border: 1px solid #dddddd;
173 border-left: 4px solid #f0f0f0;
174 padding: 0.5em;
175}
176
177div.listingblock > div.content {
178 border: 1px solid #dddddd;
179 border-left: 5px solid #f0f0f0;
180 background: #f8f8f8;
181 padding: 0.5em;
182}
183
184div.quoteblock, div.verseblock {
185 padding-left: 1.0em;
186 margin-left: 1.0em;
187 margin-right: 10%;
188 border-left: 5px solid #f0f0f0;
189 color: #888;
190}
191
192div.quoteblock > div.attribution {
193 padding-top: 0.5em;
194 text-align: right;
195}
196
197div.verseblock > pre.content {
198 font-family: inherit;
199 font-size: inherit;
200}
201div.verseblock > div.attribution {
202 padding-top: 0.75em;
203 text-align: left;
204}
205/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
206div.verseblock + div.attribution {
207 text-align: left;
208}
209
210div.admonitionblock .icon {
211 vertical-align: top;
212 font-size: 1.1em;
213 font-weight: bold;
214 text-decoration: underline;
215 color: #527bbd;
216 padding-right: 0.5em;
217}
218div.admonitionblock td.content {
219 padding-left: 0.5em;
220 border-left: 3px solid #dddddd;
221}
222
223div.exampleblock > div.content {
224 border-left: 3px solid #dddddd;
225 padding-left: 0.5em;
226}
227
228div.imageblock div.content { padding-left: 0; }
229span.image img { border-style: none; vertical-align: text-bottom; }
230a.image:visited { color: white; }
231
232dl {
233 margin-top: 0.8em;
234 margin-bottom: 0.8em;
235}
236dt {
237 margin-top: 0.5em;
238 margin-bottom: 0;
239 font-style: normal;
240 color: navy;
241}
242dd > *:first-child {
243 margin-top: 0.1em;
244}
245
246ul, ol {
247 list-style-position: outside;
248}
249ol.arabic {
250 list-style-type: decimal;
251}
252ol.loweralpha {
253 list-style-type: lower-alpha;
254}
255ol.upperalpha {
256 list-style-type: upper-alpha;
257}
258ol.lowerroman {
259 list-style-type: lower-roman;
260}
261ol.upperroman {
262 list-style-type: upper-roman;
263}
264
265div.compact ul, div.compact ol,
266div.compact p, div.compact p,
267div.compact div, div.compact div {
268 margin-top: 0.1em;
269 margin-bottom: 0.1em;
270}
271
272tfoot {
273 font-weight: bold;
274}
275td > div.verse {
276 white-space: pre;
277}
278
279div.hdlist {
280 margin-top: 0.8em;
281 margin-bottom: 0.8em;
282}
283div.hdlist tr {
284 padding-bottom: 15px;
285}
286dt.hdlist1.strong, td.hdlist1.strong {
287 font-weight: bold;
288}
289td.hdlist1 {
290 vertical-align: top;
291 font-style: normal;
292 padding-right: 0.8em;
293 color: navy;
294}
295td.hdlist2 {
296 vertical-align: top;
297}
298div.hdlist.compact tr {
299 margin: 0;
300 padding-bottom: 0;
301}
302
303.comment {
304 background: yellow;
305}
306
307.footnote, .footnoteref {
308 font-size: 0.8em;
309}
310
311span.footnote, span.footnoteref {
312 vertical-align: super;
313}
314
315#footnotes {
316 margin: 20px 0 20px 0;
317 padding: 7px 0 0 0;
318}
319
320#footnotes div.footnote {
321 margin: 0 0 5px 0;
322}
323
324#footnotes hr {
325 border: none;
326 border-top: 1px solid silver;
327 height: 1px;
328 text-align: left;
329 margin-left: 0;
330 width: 20%;
331 min-width: 100px;
332}
333
334div.colist td {
335 padding-right: 0.5em;
336 padding-bottom: 0.3em;
337 vertical-align: top;
338}
339div.colist td img {
340 margin-top: 0.3em;
341}
342
343@media print {
344 #footer-badges { display: none; }
345}
346
347#toc {
348 margin-bottom: 2.5em;
349}
350
351#toctitle {
352 color: #527bbd;
353 font-size: 1.1em;
354 font-weight: bold;
355 margin-top: 1.0em;
356 margin-bottom: 0.1em;
357}
358
359div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
360 margin-top: 0;
361 margin-bottom: 0;
362}
363div.toclevel2 {
364 margin-left: 2em;
365 font-size: 0.9em;
366}
367div.toclevel3 {
368 margin-left: 4em;
369 font-size: 0.9em;
370}
371div.toclevel4 {
372 margin-left: 6em;
373 font-size: 0.9em;
374}
375
376span.aqua { color: aqua; }
377span.black { color: black; }
378span.blue { color: blue; }
379span.fuchsia { color: fuchsia; }
380span.gray { color: gray; }
381span.green { color: green; }
382span.lime { color: lime; }
383span.maroon { color: maroon; }
384span.navy { color: navy; }
385span.olive { color: olive; }
386span.purple { color: purple; }
387span.red { color: red; }
388span.silver { color: silver; }
389span.teal { color: teal; }
390span.white { color: white; }
391span.yellow { color: yellow; }
392
393span.aqua-background { background: aqua; }
394span.black-background { background: black; }
395span.blue-background { background: blue; }
396span.fuchsia-background { background: fuchsia; }
397span.gray-background { background: gray; }
398span.green-background { background: green; }
399span.lime-background { background: lime; }
400span.maroon-background { background: maroon; }
401span.navy-background { background: navy; }
402span.olive-background { background: olive; }
403span.purple-background { background: purple; }
404span.red-background { background: red; }
405span.silver-background { background: silver; }
406span.teal-background { background: teal; }
407span.white-background { background: white; }
408span.yellow-background { background: yellow; }
409
410span.big { font-size: 2em; }
411span.small { font-size: 0.6em; }
412
413span.underline { text-decoration: underline; }
414span.overline { text-decoration: overline; }
415span.line-through { text-decoration: line-through; }
416
417div.unbreakable { page-break-inside: avoid; }
418
419
420/*
421 * xhtml11 specific
422 *
423 * */
424
425div.tableblock {
426 margin-top: 1.0em;
427 margin-bottom: 1.5em;
428}
429div.tableblock > table {
430 border: 3px solid #527bbd;
431}
432thead, p.table.header {
433 font-weight: bold;
434 color: #527bbd;
435}
436p.table {
437 margin-top: 0;
438}
Junio C Hamano725b0da2020-01-22 22:02:40439/* Because the table frame attribute is overridden by CSS in most browsers. */
Junio C Hamano11ae3202018-08-20 20:15:42440div.tableblock > table[frame="void"] {
441 border-style: none;
442}
443div.tableblock > table[frame="hsides"] {
444 border-left-style: none;
445 border-right-style: none;
446}
447div.tableblock > table[frame="vsides"] {
448 border-top-style: none;
449 border-bottom-style: none;
450}
451
452
453/*
454 * html5 specific
455 *
456 * */
457
458table.tableblock {
459 margin-top: 1.0em;
460 margin-bottom: 1.5em;
461}
462thead, p.tableblock.header {
463 font-weight: bold;
464 color: #527bbd;
465}
466p.tableblock {
467 margin-top: 0;
468}
469table.tableblock {
470 border-width: 3px;
471 border-spacing: 0px;
472 border-style: solid;
473 border-color: #527bbd;
474 border-collapse: collapse;
475}
476th.tableblock, td.tableblock {
477 border-width: 1px;
478 padding: 4px;
479 border-style: solid;
480 border-color: #527bbd;
481}
482
483table.tableblock.frame-topbot {
484 border-left-style: hidden;
485 border-right-style: hidden;
486}
487table.tableblock.frame-sides {
488 border-top-style: hidden;
489 border-bottom-style: hidden;
490}
491table.tableblock.frame-none {
492 border-style: hidden;
493}
494
495th.tableblock.halign-left, td.tableblock.halign-left {
496 text-align: left;
497}
498th.tableblock.halign-center, td.tableblock.halign-center {
499 text-align: center;
500}
501th.tableblock.halign-right, td.tableblock.halign-right {
502 text-align: right;
503}
504
505th.tableblock.valign-top, td.tableblock.valign-top {
506 vertical-align: top;
507}
508th.tableblock.valign-middle, td.tableblock.valign-middle {
509 vertical-align: middle;
510}
511th.tableblock.valign-bottom, td.tableblock.valign-bottom {
512 vertical-align: bottom;
513}
514
515
516/*
517 * manpage specific
518 *
519 * */
520
521body.manpage h1 {
522 padding-top: 0.5em;
523 padding-bottom: 0.5em;
524 border-top: 2px solid silver;
525 border-bottom: 2px solid silver;
526}
527body.manpage h2 {
528 border-style: none;
529}
530body.manpage div.sectionbody {
531 margin-left: 3em;
532}
533
534@media print {
535 body.manpage div#toc { display: none; }
536}
537
538
539</style>
540<script type="text/javascript">
541/*<![CDATA[*/
Junio C Hamano2b153182021-12-15 21:00:31542var asciidoc = { // Namespace.
543
544/////////////////////////////////////////////////////////////////////
545// Table Of Contents generator
546/////////////////////////////////////////////////////////////////////
547
548/* Author: Mihai Bazon, September 2002
549 * http://students.infoiasi.ro/~mishoo
550 *
551 * Table Of Content generator
552 * Version: 0.4
553 *
554 * Feel free to use this script under the terms of the GNU General Public
555 * License, as long as you do not remove or alter this notice.
556 */
557
558 /* modified by Troy D. Hanson, September 2006. License: GPL */
559 /* modified by Stuart Rackham, 2006, 2009. License: GPL */
560
561// toclevels = 1..4.
562toc: function (toclevels) {
563
564 function getText(el) {
565 var text = "";
566 for (var i = el.firstChild; i != null; i = i.nextSibling) {
567 if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
568 text += i.data;
569 else if (i.firstChild != null)
570 text += getText(i);
571 }
572 return text;
573 }
574
575 function TocEntry(el, text, toclevel) {
576 this.element = el;
577 this.text = text;
578 this.toclevel = toclevel;
579 }
580
581 function tocEntries(el, toclevels) {
582 var result = new Array;
583 var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
584 // Function that scans the DOM tree for header elements (the DOM2
585 // nodeIterator API would be a better technique but not supported by all
586 // browsers).
587 var iterate = function (el) {
588 for (var i = el.firstChild; i != null; i = i.nextSibling) {
589 if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
590 var mo = re.exec(i.tagName);
591 if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
592 result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
593 }
594 iterate(i);
595 }
596 }
597 }
598 iterate(el);
599 return result;
600 }
601
602 var toc = document.getElementById("toc");
603 if (!toc) {
604 return;
605 }
606
607 // Delete existing TOC entries in case we're reloading the TOC.
608 var tocEntriesToRemove = [];
609 var i;
610 for (i = 0; i < toc.childNodes.length; i++) {
611 var entry = toc.childNodes[i];
612 if (entry.nodeName.toLowerCase() == 'div'
613 && entry.getAttribute("class")
614 && entry.getAttribute("class").match(/^toclevel/))
615 tocEntriesToRemove.push(entry);
616 }
617 for (i = 0; i < tocEntriesToRemove.length; i++) {
618 toc.removeChild(tocEntriesToRemove[i]);
619 }
620
621 // Rebuild TOC entries.
622 var entries = tocEntries(document.getElementById("content"), toclevels);
623 for (var i = 0; i < entries.length; ++i) {
624 var entry = entries[i];
625 if (entry.element.id == "")
626 entry.element.id = "_toc_" + i;
627 var a = document.createElement("a");
628 a.href = "#" + entry.element.id;
629 a.appendChild(document.createTextNode(entry.text));
630 var div = document.createElement("div");
631 div.appendChild(a);
632 div.className = "toclevel" + entry.toclevel;
633 toc.appendChild(div);
634 }
635 if (entries.length == 0)
636 toc.parentNode.removeChild(toc);
637},
638
639
640/////////////////////////////////////////////////////////////////////
641// Footnotes generator
642/////////////////////////////////////////////////////////////////////
643
644/* Based on footnote generation code from:
645 * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
646 */
647
648footnotes: function () {
649 // Delete existing footnote entries in case we're reloading the footnodes.
650 var i;
651 var noteholder = document.getElementById("footnotes");
652 if (!noteholder) {
653 return;
654 }
655 var entriesToRemove = [];
656 for (i = 0; i < noteholder.childNodes.length; i++) {
657 var entry = noteholder.childNodes[i];
658 if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
659 entriesToRemove.push(entry);
660 }
661 for (i = 0; i < entriesToRemove.length; i++) {
662 noteholder.removeChild(entriesToRemove[i]);
663 }
664
665 // Rebuild footnote entries.
666 var cont = document.getElementById("content");
667 var spans = cont.getElementsByTagName("span");
668 var refs = {};
669 var n = 0;
670 for (i=0; i<spans.length; i++) {
671 if (spans[i].className == "footnote") {
672 n++;
673 var note = spans[i].getAttribute("data-note");
674 if (!note) {
675 // Use [\s\S] in place of . so multi-line matches work.
676 // Because JavaScript has no s (dotall) regex flag.
677 note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
678 spans[i].innerHTML =
679 "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
680 "' title='View footnote' class='footnote'>" + n + "</a>]";
681 spans[i].setAttribute("data-note", note);
682 }
683 noteholder.innerHTML +=
684 "<div class='footnote' id='_footnote_" + n + "'>" +
685 "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
686 n + "</a>. " + note + "</div>";
687 var id =spans[i].getAttribute("id");
688 if (id != null) refs["#"+id] = n;
689 }
690 }
691 if (n == 0)
692 noteholder.parentNode.removeChild(noteholder);
693 else {
694 // Process footnoterefs.
695 for (i=0; i<spans.length; i++) {
696 if (spans[i].className == "footnoteref") {
697 var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
698 href = href.match(/#.*/)[0]; // Because IE return full URL.
699 n = refs[href];
700 spans[i].innerHTML =
701 "[<a href='#_footnote_" + n +
702 "' title='View footnote' class='footnote'>" + n + "</a>]";
703 }
704 }
705 }
706},
707
708install: function(toclevels) {
709 var timerId;
710
711 function reinstall() {
712 asciidoc.footnotes();
713 if (toclevels) {
714 asciidoc.toc(toclevels);
715 }
716 }
717
718 function reinstallAndRemoveTimer() {
719 clearInterval(timerId);
720 reinstall();
721 }
722
723 timerId = setInterval(reinstall, 500);
724 if (document.addEventListener)
725 document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
726 else
727 window.onload = reinstallAndRemoveTimer;
728}
729
730}
Junio C Hamano11ae3202018-08-20 20:15:42731asciidoc.install();
732/*]]>*/
733</script>
734</head>
735<body class="article">
736<div id="header">
737<h1>Partial Clone Design Notes</h1>
Junio C Hamanoaedeeae2023-07-18 15:55:30738<span id="revdate">2023-07-18</span>
Junio C Hamano11ae3202018-08-20 20:15:42739</div>
740<div id="content">
741<div id="preamble">
742<div class="sectionbody">
743<div class="paragraph"><p>The "Partial Clone" feature is a performance optimization for Git that
744allows Git to function without having a complete copy of the repository.
745The goal of this work is to allow Git better handle extremely large
746repositories.</p></div>
747<div class="paragraph"><p>During clone and fetch operations, Git downloads the complete contents
748and history of the repository. This includes all commits, trees, and
749blobs for the complete life of the repository. For extremely large
750repositories, clones can take hours (or days) and consume 100+GiB of disk
751space.</p></div>
752<div class="paragraph"><p>Often in these repositories there are many blobs and trees that the user
753does not need such as:</p></div>
754<div class="olist arabic"><ol class="arabic">
755<li>
756<p>
757files outside of the user&#8217;s work area in the tree. For example, in
758 a repository with 500K directories and 3.5M files in every commit,
759 we can avoid downloading many objects if the user only needs a
760 narrow "cone" of the source tree.
761</p>
762</li>
763<li>
764<p>
765large binary assets. For example, in a repository where large build
766 artifacts are checked into the tree, we can avoid downloading all
767 previous versions of these non-mergeable binary assets and only
768 download versions that are actually referenced.
769</p>
770</li>
771</ol></div>
772<div class="paragraph"><p>Partial clone allows us to avoid downloading such unneeded objects <strong>in
773advance</strong> during clone and fetch operations and thereby reduce download
774times and disk usage. Missing objects can later be "demand fetched"
775if/when needed.</p></div>
Junio C Hamanocb705392019-09-18 19:30:01776<div class="paragraph"><p>A remote that can later provide the missing objects is called a
777promisor remote, as it promises to send the objects when
Junio C Hamano8ef91f32019-12-01 22:58:27778requested. Initially Git supported only one promisor remote, the origin
Junio C Hamanocb705392019-09-18 19:30:01779remote from which the user cloned and that was configured in the
780"extensions.partialClone" config option. Later support for more than
781one promisor remote has been implemented.</p></div>
Junio C Hamano11ae3202018-08-20 20:15:42782<div class="paragraph"><p>Use of partial clone requires that the user be online and the origin
Junio C Hamanocb705392019-09-18 19:30:01783remote or other promisor remotes be available for on-demand fetching
784of missing objects. This may or may not be problematic for the user.
785For example, if the user can stay within the pre-selected subset of
786the source tree, they may not encounter any missing objects.
787Alternatively, the user could try to pre-fetch various objects if they
788know that they are going offline.</p></div>
Junio C Hamano11ae3202018-08-20 20:15:42789</div>
790</div>
791<div class="sect1">
792<h2 id="_non_goals">Non-Goals</h2>
793<div class="sectionbody">
794<div class="paragraph"><p>Partial clone is a mechanism to limit the number of blobs and trees downloaded
795<strong>within</strong> a given range of commits&#8201;&#8212;&#8201;and is therefore independent of and not
796intended to conflict with existing DAG-level mechanisms to limit the set of
797requested commits (i.e. shallow clone, single branch, or fetch <em>&lt;refspec&gt;</em>).</p></div>
798</div>
799</div>
800<div class="sect1">
801<h2 id="_design_overview">Design Overview</h2>
802<div class="sectionbody">
803<div class="paragraph"><p>Partial clone logically consists of the following parts:</p></div>
804<div class="ulist"><ul>
805<li>
806<p>
807A mechanism for the client to describe unneeded or unwanted objects to
808 the server.
809</p>
810</li>
811<li>
812<p>
813A mechanism for the server to omit such unwanted objects from packfiles
814 sent to the client.
815</p>
816</li>
817<li>
818<p>
819A mechanism for the client to gracefully handle missing objects (that
820 were previously omitted by the server).
821</p>
822</li>
823<li>
824<p>
825A mechanism for the client to backfill missing objects as needed.
826</p>
827</li>
828</ul></div>
829</div>
830</div>
831<div class="sect1">
832<h2 id="_design_details">Design Details</h2>
833<div class="sectionbody">
834<div class="ulist"><ul>
835<li>
836<p>
837A new pack-protocol capability "filter" is added to the fetch-pack and
838 upload-pack negotiation.
839</p>
840<div class="paragraph"><p>This uses the existing capability discovery mechanism.
Junio C Hamano04495a12022-08-18 21:13:08841See "filter" in <a href="../gitprotocol-pack.html">gitprotocol-pack(5)</a>.</p></div>
Junio C Hamano11ae3202018-08-20 20:15:42842</li>
843<li>
844<p>
845Clients pass a "filter-spec" to clone and fetch which is passed to the
846 server to request filtering during packfile construction.
847</p>
848<div class="paragraph"><p>There are various filters available to accommodate different situations.
849See "--filter=&lt;filter-spec&gt;" in Documentation/rev-list-options.txt.</p></div>
850</li>
851<li>
852<p>
853On the server pack-objects applies the requested filter-spec as it
854 creates "filtered" packfiles for the client.
855</p>
856<div class="paragraph"><p>These filtered packfiles are <strong>incomplete</strong> in the traditional sense because
857they may contain objects that reference objects not contained in the
858packfile and that the client doesn&#8217;t already have. For example, the
859filtered packfile may contain trees or tags that reference missing blobs
860or commits that reference missing trees.</p></div>
861</li>
862<li>
863<p>
864On the client these incomplete packfiles are marked as "promisor packfiles"
865 and treated differently by various commands.
866</p>
867</li>
868<li>
869<p>
870On the client a repository extension is added to the local config to
871 prevent older versions of git from failing mid-operation because of
872 missing objects that they cannot handle.
873 See "extensions.partialClone" in Documentation/technical/repository-version.txt"
874</p>
875</li>
876</ul></div>
877</div>
878</div>
879<div class="sect1">
880<h2 id="_handling_missing_objects">Handling Missing Objects</h2>
881<div class="sectionbody">
882<div class="ulist"><ul>
883<li>
884<p>
Junio C Hamanocb705392019-09-18 19:30:01885An object may be missing due to a partial clone or fetch, or missing
886 due to repository corruption. To differentiate these cases, the
887 local repository specially indicates such filtered packfiles
888 obtained from promisor remotes as "promisor packfiles".
Junio C Hamano11ae3202018-08-20 20:15:42889</p>
890<div class="paragraph"><p>These promisor packfiles consist of a "&lt;name&gt;.promisor" file with
891arbitrary contents (like the "&lt;name&gt;.keep" files), in addition to
892their "&lt;name&gt;.pack" and "&lt;name&gt;.idx" files.</p></div>
893</li>
894<li>
895<p>
896The local repository considers a "promisor object" to be an object that
Junio C Hamanocb705392019-09-18 19:30:01897 it knows (to the best of its ability) that promisor remotes have promised
898 that they have, either because the local repository has that object in one of
Junio C Hamano11ae3202018-08-20 20:15:42899 its promisor packfiles, or because another promisor object refers to it.
900</p>
Junio C Hamanoa90214f2019-01-28 22:05:25901<div class="paragraph"><p>When Git encounters a missing object, Git can see if it is a promisor object
Junio C Hamano11ae3202018-08-20 20:15:42902and handle it appropriately. If not, Git can report a corruption.</p></div>
903<div class="paragraph"><p>This means that there is no need for the client to explicitly maintain an
904expensive-to-modify list of missing objects.[a]</p></div>
905</li>
906<li>
907<p>
908Since almost all Git code currently expects any referenced object to be
909 present locally and because we do not want to force every command to do
910 a dry-run first, a fallback mechanism is added to allow Git to attempt
Junio C Hamanocb705392019-09-18 19:30:01911 to dynamically fetch missing objects from promisor remotes.
Junio C Hamano11ae3202018-08-20 20:15:42912</p>
913<div class="paragraph"><p>When the normal object lookup fails to find an object, Git invokes
Junio C Hamanocb705392019-09-18 19:30:01914promisor_remote_get_direct() to try to get the object from a promisor
915remote and then retry the object lookup. This allows objects to be
916"faulted in" without complicated prediction algorithms.</p></div>
Junio C Hamano11ae3202018-08-20 20:15:42917<div class="paragraph"><p>For efficiency reasons, no check as to whether the missing object is
918actually a promisor object is performed.</p></div>
919<div class="paragraph"><p>Dynamic object fetching tends to be slow as objects are fetched one at
920a time.</p></div>
921</li>
922<li>
923<p>
924<code>checkout</code> (and any other command using <code>unpack-trees</code>) has been taught
925 to bulk pre-fetch all required missing blobs in a single batch.
926</p>
927</li>
928<li>
929<p>
930<code>rev-list</code> has been taught to print missing objects.
931</p>
932<div class="paragraph"><p>This can be used by other commands to bulk prefetch objects.
933For example, a "git log -p A..B" may internally want to first do
934something like "git rev-list --objects --quiet --missing=print A..B"
935and prefetch those objects in bulk.</p></div>
936</li>
937<li>
938<p>
939<code>fsck</code> has been updated to be fully aware of promisor objects.
940</p>
941</li>
942<li>
943<p>
944<code>repack</code> in GC has been updated to not touch promisor packfiles at all,
945 and to only repack other objects.
946</p>
947</li>
948<li>
949<p>
950The global variable "fetch_if_missing" is used to control whether an
951 object lookup will attempt to dynamically fetch a missing object or
952 report an error.
953</p>
954<div class="paragraph"><p>We are not happy with this global variable and would like to remove it,
955but that requires significant refactoring of the object code to pass an
Junio C Hamanocb705392019-09-18 19:30:01956additional flag.</p></div>
Junio C Hamano11ae3202018-08-20 20:15:42957</li>
958</ul></div>
959</div>
960</div>
961<div class="sect1">
962<h2 id="_fetching_missing_objects">Fetching Missing Objects</h2>
963<div class="sectionbody">
964<div class="ulist"><ul>
965<li>
966<p>
Junio C Hamano558abd22020-09-03 20:22:34967Fetching of objects is done by invoking a "git fetch" subprocess.
Junio C Hamano11ae3202018-08-20 20:15:42968</p>
Junio C Hamano11ae3202018-08-20 20:15:42969</li>
970<li>
971<p>
972The local repository sends a request with the hashes of all requested
Junio C Hamano558abd22020-09-03 20:22:34973 objects, and does not perform any packfile negotiation.
Junio C Hamano11ae3202018-08-20 20:15:42974 It then receives a packfile.
975</p>
976</li>
977<li>
978<p>
Junio C Hamano558abd22020-09-03 20:22:34979Because we are reusing the existing fetch mechanism, fetching
Junio C Hamano11ae3202018-08-20 20:15:42980 currently fetches all objects referred to by the requested objects, even
981 though they are not necessary.
982</p>
983</li>
Junio C Hamano4bea0282022-04-04 18:21:49984<li>
985<p>
986Fetching with <code>--refetch</code> will request a complete new filtered packfile from
987 the remote, which can be used to change a filter without needing to
988 dynamically fetch missing objects.
989</p>
990</li>
Junio C Hamano11ae3202018-08-20 20:15:42991</ul></div>
992</div>
993</div>
994<div class="sect1">
Junio C Hamanocb705392019-09-18 19:30:01995<h2 id="_using_many_promisor_remotes">Using many promisor remotes</h2>
996<div class="sectionbody">
997<div class="paragraph"><p>Many promisor remotes can be configured and used.</p></div>
998<div class="paragraph"><p>This allows for example a user to have multiple geographically-close
999cache servers for fetching missing blobs while continuing to do
1000filtered <code>git-fetch</code> commands from the central server.</p></div>
1001<div class="paragraph"><p>When fetching objects, promisor remotes are tried one after the other
1002until all the objects have been fetched.</p></div>
1003<div class="paragraph"><p>Remotes that are considered "promisor" remotes are those specified by
1004the following configuration variables:</p></div>
1005<div class="ulist"><ul>
1006<li>
1007<p>
1008<code>extensions.partialClone = &lt;name&gt;</code>
1009</p>
1010</li>
1011<li>
1012<p>
1013<code>remote.&lt;name&gt;.promisor = true</code>
1014</p>
1015</li>
1016<li>
1017<p>
1018<code>remote.&lt;name&gt;.partialCloneFilter = ...</code>
1019</p>
1020</li>
1021</ul></div>
1022<div class="paragraph"><p>Only one promisor remote can be configured using the
1023<code>extensions.partialClone</code> config variable. This promisor remote will
1024be the last one tried when fetching objects.</p></div>
1025<div class="paragraph"><p>We decided to make it the last one we try, because it is likely that
1026someone using many promisor remotes is doing so because the other
1027promisor remotes are better for some reason (maybe they are closer or
1028faster for some kind of objects) than the origin, and the origin is
1029likely to be the remote specified by extensions.partialClone.</p></div>
1030<div class="paragraph"><p>This justification is not very strong, but one choice had to be made,
1031and anyway the long term plan should be to make the order somehow
1032fully configurable.</p></div>
1033<div class="paragraph"><p>For now though the other promisor remotes will be tried in the order
1034they appear in the config file.</p></div>
1035</div>
1036</div>
1037<div class="sect1">
Junio C Hamano11ae3202018-08-20 20:15:421038<h2 id="_current_limitations">Current Limitations</h2>
1039<div class="sectionbody">
1040<div class="ulist"><ul>
1041<li>
1042<p>
Junio C Hamanocb705392019-09-18 19:30:011043It is not possible to specify the order in which the promisor
1044 remotes are tried in other ways than the order in which they appear
1045 in the config file.
Junio C Hamano11ae3202018-08-20 20:15:421046</p>
Junio C Hamanocb705392019-09-18 19:30:011047<div class="paragraph"><p>It is also not possible to specify an order to be used when fetching
1048from one remote and a different order when fetching from another
1049remote.</p></div>
Junio C Hamano11ae3202018-08-20 20:15:421050</li>
1051<li>
1052<p>
Junio C Hamanocb705392019-09-18 19:30:011053It is not possible to push only specific objects to a promisor
1054 remote.
1055</p>
1056<div class="paragraph"><p>It is not possible to push at the same time to multiple promisor
1057remote in a specific order.</p></div>
1058</li>
1059<li>
1060<p>
1061Dynamic object fetching will only ask promisor remotes for missing
1062 objects. We assume that promisor remotes have a complete view of the
Junio C Hamano11ae3202018-08-20 20:15:421063 repository and can satisfy all such requests.
1064</p>
1065</li>
1066<li>
1067<p>
1068Repack essentially treats promisor and non-promisor packfiles as 2
Junio C Hamano91a411f2021-07-14 00:40:501069 distinct partitions and does not mix them.
Junio C Hamano11ae3202018-08-20 20:15:421070</p>
1071</li>
1072<li>
1073<p>
1074Dynamic object fetching invokes fetch-pack once <strong>for each item</strong>
1075 because most algorithms stumble upon a missing object and need to have
1076 it resolved before continuing their work. This may incur significant
1077 overhead&#8201;&#8212;&#8201;and multiple authentication requests&#8201;&#8212;&#8201;if many objects are
1078 needed.
1079</p>
1080</li>
1081<li>
1082<p>
1083Dynamic object fetching currently uses the existing pack protocol V0
1084 which means that each object is requested via fetch-pack. The server
1085 will send a full set of info/refs when the connection is established.
1086 If there are large number of refs, this may incur significant overhead.
1087</p>
1088</li>
1089</ul></div>
1090</div>
1091</div>
1092<div class="sect1">
1093<h2 id="_future_work">Future Work</h2>
1094<div class="sectionbody">
1095<div class="ulist"><ul>
1096<li>
1097<p>
Junio C Hamanocb705392019-09-18 19:30:011098Improve the way to specify the order in which promisor remotes are
1099 tried.
Junio C Hamano11ae3202018-08-20 20:15:421100</p>
Junio C Hamanocb705392019-09-18 19:30:011101<div class="paragraph"><p>For example this could allow to specify explicitly something like:
1102"When fetching from this remote, I want to use these promisor remotes
1103in this order, though, when pushing or fetching to that remote, I want
1104to use those promisor remotes in that order."</p></div>
1105</li>
1106<li>
1107<p>
1108Allow pushing to promisor remotes.
1109</p>
1110<div class="paragraph"><p>The user might want to work in a triangular work flow with multiple
Junio C Hamano11ae3202018-08-20 20:15:421111promisor remotes that each have an incomplete view of the repository.</p></div>
1112</li>
1113<li>
1114<p>
Junio C Hamano11ae3202018-08-20 20:15:421115Allow non-pathname-based filters to make use of packfile bitmaps (when
1116 present). This was just an omission during the initial implementation.
1117</p>
1118</li>
1119<li>
1120<p>
1121Investigate use of a long-running process to dynamically fetch a series
1122 of objects, such as proposed in [5,6] to reduce process startup and
1123 overhead costs.
1124</p>
1125<div class="paragraph"><p>It would be nice if pack protocol V2 could allow that long-running
1126process to make a series of requests over a single long-running
1127connection.</p></div>
1128</li>
1129<li>
1130<p>
1131Investigate pack protocol V2 to avoid the info/refs broadcast on
1132 each connection with the server to dynamically fetch missing objects.
1133</p>
1134</li>
1135<li>
1136<p>
1137Investigate the need to handle loose promisor objects.
1138</p>
1139<div class="paragraph"><p>Objects in promisor packfiles are allowed to reference missing objects
1140that can be dynamically fetched from the server. An assumption was
1141made that loose objects are only created locally and therefore should
1142not reference a missing object. We may need to revisit that assumption
1143if, for example, we dynamically fetch a missing tree and store it as a
1144loose object rather than a single object packfile.</p></div>
1145<div class="paragraph"><p>This does not necessarily mean we need to mark loose objects as promisor;
1146it may be sufficient to relax the object lookup or is-promisor functions.</p></div>
1147</li>
1148</ul></div>
1149</div>
1150</div>
1151<div class="sect1">
1152<h2 id="_non_tasks">Non-Tasks</h2>
1153<div class="sectionbody">
1154<div class="ulist"><ul>
1155<li>
1156<p>
1157Every time the subject of "demand loading blobs" comes up it seems
1158 that someone suggests that the server be allowed to "guess" and send
1159 additional objects that may be related to the requested objects.
1160</p>
1161<div class="paragraph"><p>No work has gone into actually doing that; we&#8217;re just documenting that
1162it is a common suggestion. We&#8217;re not sure how it would work and have
1163no plans to work on it.</p></div>
1164<div class="paragraph"><p>It is valid for the server to send more objects than requested (even
1165for a dynamic object fetch), but we are not building on that.</p></div>
1166</li>
1167</ul></div>
1168</div>
1169</div>
1170<div class="sect1">
1171<h2 id="_footnotes">Footnotes</h2>
1172<div class="sectionbody">
1173<div class="paragraph"><p>[a] expensive-to-modify list of missing objects: Earlier in the design of
1174 partial clone we discussed the need for a single list of missing objects.
1175 This would essentially be a sorted linear list of OIDs that the were
1176 omitted by the server during a clone or subsequent fetches.</p></div>
1177<div class="paragraph"><p>This file would need to be loaded into memory on every object lookup.
1178It would need to be read, updated, and re-written (like the .git/index)
1179on every explicit "git fetch" command <strong>and</strong> on any dynamic object fetch.</p></div>
1180<div class="paragraph"><p>The cost to read, update, and write this file could add significant
1181overhead to every command if there are many missing objects. For example,
1182if there are 100M missing blobs, this file would be at least 2GiB on disk.</p></div>
1183<div class="paragraph"><p>With the "promisor" concept, we <strong>infer</strong> a missing object based upon the
1184type of packfile that references it.</p></div>
1185</div>
1186</div>
1187<div class="sect1">
1188<h2 id="_related_links">Related Links</h2>
1189<div class="sectionbody">
1190<div class="paragraph"><p>[0] <a href="https://crbug.com/git/2">https://crbug.com/git/2</a>
1191 Bug#2: Partial Clone</p></div>
Junio C Hamano59e88242019-12-10 14:09:041192<div class="paragraph"><p>[1] <a href="https://lore.kernel.org/git/20170113155253.1644-1-benpeart@microsoft.com/">https://lore.kernel.org/git/20170113155253.1644-1-benpeart@microsoft.com/</a><br />
Junio C Hamano11ae3202018-08-20 20:15:421193 Subject: [RFC] Add support for downloading blobs on demand<br />
1194 Date: Fri, 13 Jan 2017 10:52:53 -0500</p></div>
Junio C Hamano59e88242019-12-10 14:09:041195<div class="paragraph"><p>[2] <a href="https://lore.kernel.org/git/cover.1506714999.git.jonathantanmy@google.com/">https://lore.kernel.org/git/cover.1506714999.git.jonathantanmy@google.com/</a><br />
Junio C Hamano11ae3202018-08-20 20:15:421196 Subject: [PATCH 00/18] Partial clone (from clone to lazy fetch in 18 patches)<br />
1197 Date: Fri, 29 Sep 2017 13:11:36 -0700</p></div>
Junio C Hamano59e88242019-12-10 14:09:041198<div class="paragraph"><p>[3] <a href="https://lore.kernel.org/git/20170426221346.25337-1-jonathantanmy@google.com/">https://lore.kernel.org/git/20170426221346.25337-1-jonathantanmy@google.com/</a><br />
Junio C Hamano11ae3202018-08-20 20:15:421199 Subject: Proposal for missing blob support in Git repos<br />
1200 Date: Wed, 26 Apr 2017 15:13:46 -0700</p></div>
Junio C Hamano59e88242019-12-10 14:09:041201<div class="paragraph"><p>[4] <a href="https://lore.kernel.org/git/1488999039-37631-1-git-send-email-git@jeffhostetler.com/">https://lore.kernel.org/git/1488999039-37631-1-git-send-email-git@jeffhostetler.com/</a><br />
Junio C Hamano11ae3202018-08-20 20:15:421202 Subject: [PATCH 00/10] RFC Partial Clone and Fetch<br />
1203 Date: Wed, 8 Mar 2017 18:50:29 +0000</p></div>
Junio C Hamano59e88242019-12-10 14:09:041204<div class="paragraph"><p>[5] <a href="https://lore.kernel.org/git/20170505152802.6724-1-benpeart@microsoft.com/">https://lore.kernel.org/git/20170505152802.6724-1-benpeart@microsoft.com/</a><br />
Junio C Hamano11ae3202018-08-20 20:15:421205 Subject: [PATCH v7 00/10] refactor the filter process code into a reusable module<br />
1206 Date: Fri, 5 May 2017 11:27:52 -0400</p></div>
Junio C Hamano59e88242019-12-10 14:09:041207<div class="paragraph"><p>[6] <a href="https://lore.kernel.org/git/20170714132651.170708-1-benpeart@microsoft.com/">https://lore.kernel.org/git/20170714132651.170708-1-benpeart@microsoft.com/</a><br />
Junio C Hamano11ae3202018-08-20 20:15:421208 Subject: [RFC/PATCH v2 0/1] Add support for downloading blobs on demand<br />
1209 Date: Fri, 14 Jul 2017 09:26:50 -0400</p></div>
1210</div>
1211</div>
1212</div>
1213<div id="footnotes"><hr /></div>
1214<div id="footer">
1215<div id="footer-text">
1216Last updated
Junio C Hamanoa7b2c102023-06-13 21:00:151217 2022-08-18 14:11:07 PDT
Junio C Hamano11ae3202018-08-20 20:15:421218</div>
1219</div>
1220</body>
1221</html>