| <?xml version="1.0" encoding="UTF-8"?> | |
| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" | |
| "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> | |
| <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> | |
| <head> | |
| <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" /> | |
| <meta name="generator" content="AsciiDoc 8.6.10" /> | |
| <title>Partial Clone Design Notes</title> | |
| <style type="text/css"> | |
| /* Shared CSS for AsciiDoc xhtml11 and html5 backends */ | |
| /* Default font. */ | |
| body { | |
| font-family: Georgia,serif; | |
| } | |
| /* Title font. */ | |
| h1, h2, h3, h4, h5, h6, | |
| div.title, caption.title, | |
| thead, p.table.header, | |
| #toctitle, | |
| #author, #revnumber, #revdate, #revremark, | |
| #footer { | |
| font-family: Arial,Helvetica,sans-serif; | |
| } | |
| body { | |
| margin: 1em 5% 1em 5%; | |
| } | |
| a { | |
| color: blue; | |
| text-decoration: underline; | |
| } | |
| a:visited { | |
| color: fuchsia; | |
| } | |
| em { | |
| font-style: italic; | |
| color: navy; | |
| } | |
| strong { | |
| font-weight: bold; | |
| color: #083194; | |
| } | |
| h1, h2, h3, h4, h5, h6 { | |
| color: #527bbd; | |
| margin-top: 1.2em; | |
| margin-bottom: 0.5em; | |
| line-height: 1.3; | |
| } | |
| h1, h2, h3 { | |
| border-bottom: 2px solid silver; | |
| } | |
| h2 { | |
| padding-top: 0.5em; | |
| } | |
| h3 { | |
| float: left; | |
| } | |
| h3 + * { | |
| clear: left; | |
| } | |
| h5 { | |
| font-size: 1.0em; | |
| } | |
| div.sectionbody { | |
| margin-left: 0; | |
| } | |
| hr { | |
| border: 1px solid silver; | |
| } | |
| p { | |
| margin-top: 0.5em; | |
| margin-bottom: 0.5em; | |
| } | |
| ul, ol, li > p { | |
| margin-top: 0; | |
| } | |
| ul > li { color: #aaa; } | |
| ul > li > * { color: black; } | |
| .monospaced, code, pre { | |
| font-family: "Courier New", Courier, monospace; | |
| font-size: inherit; | |
| color: navy; | |
| padding: 0; | |
| margin: 0; | |
| } | |
| pre { | |
| white-space: pre-wrap; | |
| } | |
| #author { | |
| color: #527bbd; | |
| font-weight: bold; | |
| font-size: 1.1em; | |
| } | |
| #email { | |
| } | |
| #revnumber, #revdate, #revremark { | |
| } | |
| #footer { | |
| font-size: small; | |
| border-top: 2px solid silver; | |
| padding-top: 0.5em; | |
| margin-top: 4.0em; | |
| } | |
| #footer-text { | |
| float: left; | |
| padding-bottom: 0.5em; | |
| } | |
| #footer-badges { | |
| float: right; | |
| padding-bottom: 0.5em; | |
| } | |
| #preamble { | |
| margin-top: 1.5em; | |
| margin-bottom: 1.5em; | |
| } | |
| div.imageblock, div.exampleblock, div.verseblock, | |
| div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock, | |
| div.admonitionblock { | |
| margin-top: 1.0em; | |
| margin-bottom: 1.5em; | |
| } | |
| div.admonitionblock { | |
| margin-top: 2.0em; | |
| margin-bottom: 2.0em; | |
| margin-right: 10%; | |
| color: #606060; | |
| } | |
| div.content { /* Block element content. */ | |
| padding: 0; | |
| } | |
| /* Block element titles. */ | |
| div.title, caption.title { | |
| color: #527bbd; | |
| font-weight: bold; | |
| text-align: left; | |
| margin-top: 1.0em; | |
| margin-bottom: 0.5em; | |
| } | |
| div.title + * { | |
| margin-top: 0; | |
| } | |
| td div.title:first-child { | |
| margin-top: 0.0em; | |
| } | |
| div.content div.title:first-child { | |
| margin-top: 0.0em; | |
| } | |
| div.content + div.title { | |
| margin-top: 0.0em; | |
| } | |
| div.sidebarblock > div.content { | |
| background: #ffffee; | |
| border: 1px solid #dddddd; | |
| border-left: 4px solid #f0f0f0; | |
| padding: 0.5em; | |
| } | |
| div.listingblock > div.content { | |
| border: 1px solid #dddddd; | |
| border-left: 5px solid #f0f0f0; | |
| background: #f8f8f8; | |
| padding: 0.5em; | |
| } | |
| div.quoteblock, div.verseblock { | |
| padding-left: 1.0em; | |
| margin-left: 1.0em; | |
| margin-right: 10%; | |
| border-left: 5px solid #f0f0f0; | |
| color: #888; | |
| } | |
| div.quoteblock > div.attribution { | |
| padding-top: 0.5em; | |
| text-align: right; | |
| } | |
| div.verseblock > pre.content { | |
| font-family: inherit; | |
| font-size: inherit; | |
| } | |
| div.verseblock > div.attribution { | |
| padding-top: 0.75em; | |
| text-align: left; | |
| } | |
| /* DEPRECATED: Pre version 8.2.7 verse style literal block. */ | |
| div.verseblock + div.attribution { | |
| text-align: left; | |
| } | |
| div.admonitionblock .icon { | |
| vertical-align: top; | |
| font-size: 1.1em; | |
| font-weight: bold; | |
| text-decoration: underline; | |
| color: #527bbd; | |
| padding-right: 0.5em; | |
| } | |
| div.admonitionblock td.content { | |
| padding-left: 0.5em; | |
| border-left: 3px solid #dddddd; | |
| } | |
| div.exampleblock > div.content { | |
| border-left: 3px solid #dddddd; | |
| padding-left: 0.5em; | |
| } | |
| div.imageblock div.content { padding-left: 0; } | |
| span.image img { border-style: none; vertical-align: text-bottom; } | |
| a.image:visited { color: white; } | |
| dl { | |
| margin-top: 0.8em; | |
| margin-bottom: 0.8em; | |
| } | |
| dt { | |
| margin-top: 0.5em; | |
| margin-bottom: 0; | |
| font-style: normal; | |
| color: navy; | |
| } | |
| dd > *:first-child { | |
| margin-top: 0.1em; | |
| } | |
| ul, ol { | |
| list-style-position: outside; | |
| } | |
| ol.arabic { | |
| list-style-type: decimal; | |
| } | |
| ol.loweralpha { | |
| list-style-type: lower-alpha; | |
| } | |
| ol.upperalpha { | |
| list-style-type: upper-alpha; | |
| } | |
| ol.lowerroman { | |
| list-style-type: lower-roman; | |
| } | |
| ol.upperroman { | |
| list-style-type: upper-roman; | |
| } | |
| div.compact ul, div.compact ol, | |
| div.compact p, div.compact p, | |
| div.compact div, div.compact div { | |
| margin-top: 0.1em; | |
| margin-bottom: 0.1em; | |
| } | |
| tfoot { | |
| font-weight: bold; | |
| } | |
| td > div.verse { | |
| white-space: pre; | |
| } | |
| div.hdlist { | |
| margin-top: 0.8em; | |
| margin-bottom: 0.8em; | |
| } | |
| div.hdlist tr { | |
| padding-bottom: 15px; | |
| } | |
| dt.hdlist1.strong, td.hdlist1.strong { | |
| font-weight: bold; | |
| } | |
| td.hdlist1 { | |
| vertical-align: top; | |
| font-style: normal; | |
| padding-right: 0.8em; | |
| color: navy; | |
| } | |
| td.hdlist2 { | |
| vertical-align: top; | |
| } | |
| div.hdlist.compact tr { | |
| margin: 0; | |
| padding-bottom: 0; | |
| } | |
| .comment { | |
| background: yellow; | |
| } | |
| .footnote, .footnoteref { | |
| font-size: 0.8em; | |
| } | |
| span.footnote, span.footnoteref { | |
| vertical-align: super; | |
| } | |
| #footnotes { | |
| margin: 20px 0 20px 0; | |
| padding: 7px 0 0 0; | |
| } | |
| #footnotes div.footnote { | |
| margin: 0 0 5px 0; | |
| } | |
| #footnotes hr { | |
| border: none; | |
| border-top: 1px solid silver; | |
| height: 1px; | |
| text-align: left; | |
| margin-left: 0; | |
| width: 20%; | |
| min-width: 100px; | |
| } | |
| div.colist td { | |
| padding-right: 0.5em; | |
| padding-bottom: 0.3em; | |
| vertical-align: top; | |
| } | |
| div.colist td img { | |
| margin-top: 0.3em; | |
| } | |
| @media print { | |
| #footer-badges { display: none; } | |
| } | |
| #toc { | |
| margin-bottom: 2.5em; | |
| } | |
| #toctitle { | |
| color: #527bbd; | |
| font-size: 1.1em; | |
| font-weight: bold; | |
| margin-top: 1.0em; | |
| margin-bottom: 0.1em; | |
| } | |
| div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 { | |
| margin-top: 0; | |
| margin-bottom: 0; | |
| } | |
| div.toclevel2 { | |
| margin-left: 2em; | |
| font-size: 0.9em; | |
| } | |
| div.toclevel3 { | |
| margin-left: 4em; | |
| font-size: 0.9em; | |
| } | |
| div.toclevel4 { | |
| margin-left: 6em; | |
| font-size: 0.9em; | |
| } | |
| span.aqua { color: aqua; } | |
| span.black { color: black; } | |
| span.blue { color: blue; } | |
| span.fuchsia { color: fuchsia; } | |
| span.gray { color: gray; } | |
| span.green { color: green; } | |
| span.lime { color: lime; } | |
| span.maroon { color: maroon; } | |
| span.navy { color: navy; } | |
| span.olive { color: olive; } | |
| span.purple { color: purple; } | |
| span.red { color: red; } | |
| span.silver { color: silver; } | |
| span.teal { color: teal; } | |
| span.white { color: white; } | |
| span.yellow { color: yellow; } | |
| span.aqua-background { background: aqua; } | |
| span.black-background { background: black; } | |
| span.blue-background { background: blue; } | |
| span.fuchsia-background { background: fuchsia; } | |
| span.gray-background { background: gray; } | |
| span.green-background { background: green; } | |
| span.lime-background { background: lime; } | |
| span.maroon-background { background: maroon; } | |
| span.navy-background { background: navy; } | |
| span.olive-background { background: olive; } | |
| span.purple-background { background: purple; } | |
| span.red-background { background: red; } | |
| span.silver-background { background: silver; } | |
| span.teal-background { background: teal; } | |
| span.white-background { background: white; } | |
| span.yellow-background { background: yellow; } | |
| span.big { font-size: 2em; } | |
| span.small { font-size: 0.6em; } | |
| span.underline { text-decoration: underline; } | |
| span.overline { text-decoration: overline; } | |
| span.line-through { text-decoration: line-through; } | |
| div.unbreakable { page-break-inside: avoid; } | |
| /* | |
| * xhtml11 specific | |
| * | |
| * */ | |
| div.tableblock { | |
| margin-top: 1.0em; | |
| margin-bottom: 1.5em; | |
| } | |
| div.tableblock > table { | |
| border: 3px solid #527bbd; | |
| } | |
| thead, p.table.header { | |
| font-weight: bold; | |
| color: #527bbd; | |
| } | |
| p.table { | |
| margin-top: 0; | |
| } | |
| /* Because the table frame attribute is overriden by CSS in most browsers. */ | |
| div.tableblock > table[frame="void"] { | |
| border-style: none; | |
| } | |
| div.tableblock > table[frame="hsides"] { | |
| border-left-style: none; | |
| border-right-style: none; | |
| } | |
| div.tableblock > table[frame="vsides"] { | |
| border-top-style: none; | |
| border-bottom-style: none; | |
| } | |
| /* | |
| * html5 specific | |
| * | |
| * */ | |
| table.tableblock { | |
| margin-top: 1.0em; | |
| margin-bottom: 1.5em; | |
| } | |
| thead, p.tableblock.header { | |
| font-weight: bold; | |
| color: #527bbd; | |
| } | |
| p.tableblock { | |
| margin-top: 0; | |
| } | |
| table.tableblock { | |
| border-width: 3px; | |
| border-spacing: 0px; | |
| border-style: solid; | |
| border-color: #527bbd; | |
| border-collapse: collapse; | |
| } | |
| th.tableblock, td.tableblock { | |
| border-width: 1px; | |
| padding: 4px; | |
| border-style: solid; | |
| border-color: #527bbd; | |
| } | |
| table.tableblock.frame-topbot { | |
| border-left-style: hidden; | |
| border-right-style: hidden; | |
| } | |
| table.tableblock.frame-sides { | |
| border-top-style: hidden; | |
| border-bottom-style: hidden; | |
| } | |
| table.tableblock.frame-none { | |
| border-style: hidden; | |
| } | |
| th.tableblock.halign-left, td.tableblock.halign-left { | |
| text-align: left; | |
| } | |
| th.tableblock.halign-center, td.tableblock.halign-center { | |
| text-align: center; | |
| } | |
| th.tableblock.halign-right, td.tableblock.halign-right { | |
| text-align: right; | |
| } | |
| th.tableblock.valign-top, td.tableblock.valign-top { | |
| vertical-align: top; | |
| } | |
| th.tableblock.valign-middle, td.tableblock.valign-middle { | |
| vertical-align: middle; | |
| } | |
| th.tableblock.valign-bottom, td.tableblock.valign-bottom { | |
| vertical-align: bottom; | |
| } | |
| /* | |
| * manpage specific | |
| * | |
| * */ | |
| body.manpage h1 { | |
| padding-top: 0.5em; | |
| padding-bottom: 0.5em; | |
| border-top: 2px solid silver; | |
| border-bottom: 2px solid silver; | |
| } | |
| body.manpage h2 { | |
| border-style: none; | |
| } | |
| body.manpage div.sectionbody { | |
| margin-left: 3em; | |
| } | |
| @media print { | |
| body.manpage div#toc { display: none; } | |
| } | |
| </style> | |
| <script type="text/javascript"> | |
| /*<+'])'); | |
| // Function that scans the DOM tree for header elements (the DOM2 | |
| // nodeIterator API would be a better technique but not supported by all | |
| // browsers). | |
| var iterate = function (el) { | |
| for (var i = el.firstChild; i != null; i = i.nextSibling) { | |
| if (i.nodeType == 1 /* Node.ELEMENT_NODE */) { | |
| var mo = re.exec(i.tagName); | |
| if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") { | |
| result[result.length] = new TocEntry(i, getText(i), mo[1]-1); | |
| } | |
| iterate(i); | |
| } | |
| } | |
| } | |
| iterate(el); | |
| return result; | |
| } | |
| var toc = document.getElementById("toc"); | |
| if (!toc) { | |
| return; | |
| } | |
| // Delete existing TOC entries in case we're reloading the TOC. | |
| var tocEntriesToRemove = []; | |
| var i; | |
| for (i = 0; i < toc.childNodes.length; i++) { | |
| var entry = toc.childNodes[i]; | |
| if (entry.nodeName.toLowerCase() == 'div' | |
| && entry.getAttribute("class") | |
| && entry.getAttribute("class").match(/^toclevel/)) | |
| tocEntriesToRemove.push(entry); | |
| } | |
| for (i = 0; i < tocEntriesToRemove.length; i++) { | |
| toc.removeChild(tocEntriesToRemove[i]); | |
| } | |
| // Rebuild TOC entries. | |
| var entries = tocEntries(document.getElementById("content"), toclevels); | |
| for (var i = 0; i < entries.length; ++i) { | |
| var entry = entries[i]; | |
| if (entry.element.id == "") | |
| entry.element.id = "_toc_" + i; | |
| var a = document.createElement("a"); | |
| a.href = "#" + entry.element.id; | |
| a.appendChild(document.createTextNode(entry.text)); | |
| var div = document.createElement("div"); | |
| div.appendChild(a); | |
| div.className = "toclevel" + entry.toclevel; | |
| toc.appendChild(div); | |
| } | |
| if (entries.length == 0) | |
| toc.parentNode.removeChild(toc); | |
| }, | |
| ///////////////////////////////////////////////////////////////////// | |
| // Footnotes generator | |
| ///////////////////////////////////////////////////////////////////// | |
| /* Based on footnote generation code from: | |
| * http://www.brandspankingnew.net/archive/2005/07/format_footnote.html | |
| */ | |
| footnotes: function () { | |
| // Delete existing footnote entries in case we're reloading the footnodes. | |
| var i; | |
| var noteholder = document.getElementById("footnotes"); | |
| if (!noteholder) { | |
| return; | |
| } | |
| var entriesToRemove = []; | |
| for (i = 0; i < noteholder.childNodes.length; i++) { | |
| var entry = noteholder.childNodes[i]; | |
| if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote") | |
| entriesToRemove.push(entry); | |
| } | |
| for (i = 0; i < entriesToRemove.length; i++) { | |
| noteholder.removeChild(entriesToRemove[i]); | |
| } | |
| // Rebuild footnote entries. | |
| var cont = document.getElementById("content"); | |
| var spans = cont.getElementsByTagName("span"); | |
| var refs = {}; | |
| var n = 0; | |
| for (i=0; i<spans.length; i++) { | |
| if (spans[i].className == "footnote") { | |
| n++; | |
| var note = spans[i].getAttribute("data-note"); | |
| if (!note) { | |
| // Use [\s\S] in place of . so multi-line matches work. | |
| // Because JavaScript has no s (dotall) regex flag. | |
| note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1]; | |
| spans[i].innerHTML = | |
| "[<a id='_footnoteref_" + n + "' href='#_footnote_" + n + | |
| "' title='View footnote' class='footnote'>" + n + "</a>]"; | |
| spans[i].setAttribute("data-note", note); | |
| } | |
| noteholder.innerHTML += | |
| "<div class='footnote' id='_footnote_" + n + "'>" + | |
| "<a href='#_footnoteref_" + n + "' title='Return to text'>" + | |
| n + "</a>. " + note + "</div>"; | |
| var id =spans[i].getAttribute("id"); | |
| if (id != null) refs["#"+id] = n; | |
| } | |
| } | |
| if (n == 0) | |
| noteholder.parentNode.removeChild(noteholder); | |
| else { | |
| // Process footnoterefs. | |
| for (i=0; i<spans.length; i++) { | |
| if (spans[i].className == "footnoteref") { | |
| var href = spans[i].getElementsByTagName("a")[0].getAttribute("href"); | |
| href = href.match(/#.*/)[0]; // Because IE return full URL. | |
| n = refs[href]; | |
| spans[i].innerHTML = | |
| "[<a href='#_footnote_" + n + | |
| "' title='View footnote' class='footnote'>" + n + "</a>]"; | |
| } | |
| } | |
| } | |
| }, | |
| install: function(toclevels) { | |
| var timerId; | |
| function reinstall() { | |
| asciidoc.footnotes(); | |
| if (toclevels) { | |
| asciidoc.toc(toclevels); | |
| } | |
| } | |
| function reinstallAndRemoveTimer() { | |
| clearInterval(timerId); | |
| reinstall(); | |
| } | |
| timerId = setInterval(reinstall, 500); | |
| if (document.addEventListener) | |
| document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false); | |
| else | |
| window.onload = reinstallAndRemoveTimer; | |
| } | |
| } | |
| asciidoc.install(); | |
| /*]]>*/ | |
| </script> | |
| </head> | |
| <body class="article"> | |
| <div id="header"> | |
| <h1>Partial Clone Design Notes</h1> | |
| </div> | |
| <div id="content"> | |
| <div id="preamble"> | |
| <div class="sectionbody"> | |
| <div class="paragraph"><p>The "Partial Clone" feature is a performance optimization for Git that | |
| allows Git to function without having a complete copy of the repository. | |
| The goal of this work is to allow Git better handle extremely large | |
| repositories.</p></div> | |
| <div class="paragraph"><p>During clone and fetch operations, Git downloads the complete contents | |
| and history of the repository. This includes all commits, trees, and | |
| blobs for the complete life of the repository. For extremely large | |
| repositories, clones can take hours (or days) and consume 100+GiB of disk | |
| space.</p></div> | |
| <div class="paragraph"><p>Often in these repositories there are many blobs and trees that the user | |
| does not need such as:</p></div> | |
| <div class="olist arabic"><ol class="arabic"> | |
| <li> | |
| <p> | |
| files outside of the user’s work area in the tree. For example, in | |
| a repository with 500K directories and 3.5M files in every commit, | |
| we can avoid downloading many objects if the user only needs a | |
| narrow "cone" of the source tree. | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| large binary assets. For example, in a repository where large build | |
| artifacts are checked into the tree, we can avoid downloading all | |
| previous versions of these non-mergeable binary assets and only | |
| download versions that are actually referenced. | |
| </p> | |
| </li> | |
| </ol></div> | |
| <div class="paragraph"><p>Partial clone allows us to avoid downloading such unneeded objects <strong>in | |
| advance</strong> during clone and fetch operations and thereby reduce download | |
| times and disk usage. Missing objects can later be "demand fetched" | |
| if/when needed.</p></div> | |
| <div class="paragraph"><p>Use of partial clone requires that the user be online and the origin | |
| remote be available for on-demand fetching of missing objects. This may | |
| or may not be problematic for the user. For example, if the user can | |
| stay within the pre-selected subset of the source tree, they may not | |
| encounter any missing objects. Alternatively, the user could try to | |
| pre-fetch various objects if they know that they are going offline.</p></div> | |
| </div> | |
| </div> | |
| <div class="sect1"> | |
| <h2 id="_non_goals">Non-Goals</h2> | |
| <div class="sectionbody"> | |
| <div class="paragraph"><p>Partial clone is a mechanism to limit the number of blobs and trees downloaded | |
| <strong>within</strong> a given range of commits — and is therefore independent of and not | |
| intended to conflict with existing DAG-level mechanisms to limit the set of | |
| requested commits (i.e. shallow clone, single branch, or fetch <em><refspec></em>).</p></div> | |
| </div> | |
| </div> | |
| <div class="sect1"> | |
| <h2 id="_design_overview">Design Overview</h2> | |
| <div class="sectionbody"> | |
| <div class="paragraph"><p>Partial clone logically consists of the following parts:</p></div> | |
| <div class="ulist"><ul> | |
| <li> | |
| <p> | |
| A mechanism for the client to describe unneeded or unwanted objects to | |
| the server. | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| A mechanism for the server to omit such unwanted objects from packfiles | |
| sent to the client. | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| A mechanism for the client to gracefully handle missing objects (that | |
| were previously omitted by the server). | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| A mechanism for the client to backfill missing objects as needed. | |
| </p> | |
| </li> | |
| </ul></div> | |
| </div> | |
| </div> | |
| <div class="sect1"> | |
| <h2 id="_design_details">Design Details</h2> | |
| <div class="sectionbody"> | |
| <div class="ulist"><ul> | |
| <li> | |
| <p> | |
| A new pack-protocol capability "filter" is added to the fetch-pack and | |
| upload-pack negotiation. | |
| </p> | |
| <div class="paragraph"><p>This uses the existing capability discovery mechanism. | |
| See "filter" in Documentation/technical/pack-protocol.txt.</p></div> | |
| </li> | |
| <li> | |
| <p> | |
| Clients pass a "filter-spec" to clone and fetch which is passed to the | |
| server to request filtering during packfile construction. | |
| </p> | |
| <div class="paragraph"><p>There are various filters available to accommodate different situations. | |
| See "--filter=<filter-spec>" in Documentation/rev-list-options.txt.</p></div> | |
| </li> | |
| <li> | |
| <p> | |
| On the server pack-objects applies the requested filter-spec as it | |
| creates "filtered" packfiles for the client. | |
| </p> | |
| <div class="paragraph"><p>These filtered packfiles are <strong>incomplete</strong> in the traditional sense because | |
| they may contain objects that reference objects not contained in the | |
| packfile and that the client doesn’t already have. For example, the | |
| filtered packfile may contain trees or tags that reference missing blobs | |
| or commits that reference missing trees.</p></div> | |
| </li> | |
| <li> | |
| <p> | |
| On the client these incomplete packfiles are marked as "promisor packfiles" | |
| and treated differently by various commands. | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| On the client a repository extension is added to the local config to | |
| prevent older versions of git from failing mid-operation because of | |
| missing objects that they cannot handle. | |
| See "extensions.partialClone" in Documentation/technical/repository-version.txt" | |
| </p> | |
| </li> | |
| </ul></div> | |
| </div> | |
| </div> | |
| <div class="sect1"> | |
| <h2 id="_handling_missing_objects">Handling Missing Objects</h2> | |
| <div class="sectionbody"> | |
| <div class="ulist"><ul> | |
| <li> | |
| <p> | |
| An object may be missing due to a partial clone or fetch, or missing due | |
| to repository corruption. To differentiate these cases, the local | |
| repository specially indicates such filtered packfiles obtained from the | |
| promisor remote as "promisor packfiles". | |
| </p> | |
| <div class="paragraph"><p>These promisor packfiles consist of a "<name>.promisor" file with | |
| arbitrary contents (like the "<name>.keep" files), in addition to | |
| their "<name>.pack" and "<name>.idx" files.</p></div> | |
| </li> | |
| <li> | |
| <p> | |
| The local repository considers a "promisor object" to be an object that | |
| it knows (to the best of its ability) that the promisor remote has promised | |
| that it has, either because the local repository has that object in one of | |
| its promisor packfiles, or because another promisor object refers to it. | |
| </p> | |
| <div class="paragraph"><p>When Git encounters a missing object, Git can see if it is a promisor object | |
| and handle it appropriately. If not, Git can report a corruption.</p></div> | |
| <div class="paragraph"><p>This means that there is no need for the client to explicitly maintain an | |
| expensive-to-modify list of missing objects.[a]</p></div> | |
| </li> | |
| <li> | |
| <p> | |
| Since almost all Git code currently expects any referenced object to be | |
| present locally and because we do not want to force every command to do | |
| a dry-run first, a fallback mechanism is added to allow Git to attempt | |
| to dynamically fetch missing objects from the promisor remote. | |
| </p> | |
| <div class="paragraph"><p>When the normal object lookup fails to find an object, Git invokes | |
| fetch-object to try to get the object from the server and then retry | |
| the object lookup. This allows objects to be "faulted in" without | |
| complicated prediction algorithms.</p></div> | |
| <div class="paragraph"><p>For efficiency reasons, no check as to whether the missing object is | |
| actually a promisor object is performed.</p></div> | |
| <div class="paragraph"><p>Dynamic object fetching tends to be slow as objects are fetched one at | |
| a time.</p></div> | |
| </li> | |
| <li> | |
| <p> | |
| <code>checkout</code> (and any other command using <code>unpack-trees</code>) has been taught | |
| to bulk pre-fetch all required missing blobs in a single batch. | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| <code>rev-list</code> has been taught to print missing objects. | |
| </p> | |
| <div class="paragraph"><p>This can be used by other commands to bulk prefetch objects. | |
| For example, a "git log -p A..B" may internally want to first do | |
| something like "git rev-list --objects --quiet --missing=print A..B" | |
| and prefetch those objects in bulk.</p></div> | |
| </li> | |
| <li> | |
| <p> | |
| <code>fsck</code> has been updated to be fully aware of promisor objects. | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| <code>repack</code> in GC has been updated to not touch promisor packfiles at all, | |
| and to only repack other objects. | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| The global variable "fetch_if_missing" is used to control whether an | |
| object lookup will attempt to dynamically fetch a missing object or | |
| report an error. | |
| </p> | |
| <div class="paragraph"><p>We are not happy with this global variable and would like to remove it, | |
| but that requires significant refactoring of the object code to pass an | |
| additional flag. We hope that concurrent efforts to add an ODB API can | |
| encompass this.</p></div> | |
| </li> | |
| </ul></div> | |
| </div> | |
| </div> | |
| <div class="sect1"> | |
| <h2 id="_fetching_missing_objects">Fetching Missing Objects</h2> | |
| <div class="sectionbody"> | |
| <div class="ulist"><ul> | |
| <li> | |
| <p> | |
| Fetching of objects is done using the existing transport mechanism using | |
| transport_fetch_refs(), setting a new transport option | |
| TRANS_OPT_NO_DEPENDENTS to indicate that only the objects themselves are | |
| desired, not any object that they refer to. | |
| </p> | |
| <div class="paragraph"><p>Because some transports invoke fetch_pack() in the same process, fetch_pack() | |
| has been updated to not use any object flags when the corresponding argument | |
| (no_dependents) is set.</p></div> | |
| </li> | |
| <li> | |
| <p> | |
| The local repository sends a request with the hashes of all requested | |
| objects as "want" lines, and does not perform any packfile negotiation. | |
| It then receives a packfile. | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| Because we are reusing the existing fetch-pack mechanism, fetching | |
| currently fetches all objects referred to by the requested objects, even | |
| though they are not necessary. | |
| </p> | |
| </li> | |
| </ul></div> | |
| </div> | |
| </div> | |
| <div class="sect1"> | |
| <h2 id="_current_limitations">Current Limitations</h2> | |
| <div class="sectionbody"> | |
| <div class="ulist"><ul> | |
| <li> | |
| <p> | |
| The remote used for a partial clone (or the first partial fetch | |
| following a regular clone) is marked as the "promisor remote". | |
| </p> | |
| <div class="paragraph"><p>We are currently limited to a single promisor remote and only that | |
| remote may be used for subsequent partial fetches.</p></div> | |
| <div class="paragraph"><p>We accept this limitation because we believe initial users of this | |
| feature will be using it on repositories with a strong single central | |
| server.</p></div> | |
| </li> | |
| <li> | |
| <p> | |
| Dynamic object fetching will only ask the promisor remote for missing | |
| objects. We assume that the promisor remote has a complete view of the | |
| repository and can satisfy all such requests. | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| Repack essentially treats promisor and non-promisor packfiles as 2 | |
| distinct partitions and does not mix them. Repack currently only works | |
| on non-promisor packfiles and loose objects. | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| Dynamic object fetching invokes fetch-pack once <strong>for each item</strong> | |
| because most algorithms stumble upon a missing object and need to have | |
| it resolved before continuing their work. This may incur significant | |
| overhead — and multiple authentication requests — if many objects are | |
| needed. | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| Dynamic object fetching currently uses the existing pack protocol V0 | |
| which means that each object is requested via fetch-pack. The server | |
| will send a full set of info/refs when the connection is established. | |
| If there are large number of refs, this may incur significant overhead. | |
| </p> | |
| </li> | |
| </ul></div> | |
| </div> | |
| </div> | |
| <div class="sect1"> | |
| <h2 id="_future_work">Future Work</h2> | |
| <div class="sectionbody"> | |
| <div class="ulist"><ul> | |
| <li> | |
| <p> | |
| Allow more than one promisor remote and define a strategy for fetching | |
| missing objects from specific promisor remotes or of iterating over the | |
| set of promisor remotes until a missing object is found. | |
| </p> | |
| <div class="paragraph"><p>A user might want to have multiple geographically-close cache servers | |
| for fetching missing blobs while continuing to do filtered <code>git-fetch</code> | |
| commands from the central server, for example.</p></div> | |
| <div class="paragraph"><p>Or the user might want to work in a triangular work flow with multiple | |
| promisor remotes that each have an incomplete view of the repository.</p></div> | |
| </li> | |
| <li> | |
| <p> | |
| Allow repack to work on promisor packfiles (while keeping them distinct | |
| from non-promisor packfiles). | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| Allow non-pathname-based filters to make use of packfile bitmaps (when | |
| present). This was just an omission during the initial implementation. | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| Investigate use of a long-running process to dynamically fetch a series | |
| of objects, such as proposed in [5,6] to reduce process startup and | |
| overhead costs. | |
| </p> | |
| <div class="paragraph"><p>It would be nice if pack protocol V2 could allow that long-running | |
| process to make a series of requests over a single long-running | |
| connection.</p></div> | |
| </li> | |
| <li> | |
| <p> | |
| Investigate pack protocol V2 to avoid the info/refs broadcast on | |
| each connection with the server to dynamically fetch missing objects. | |
| </p> | |
| </li> | |
| <li> | |
| <p> | |
| Investigate the need to handle loose promisor objects. | |
| </p> | |
| <div class="paragraph"><p>Objects in promisor packfiles are allowed to reference missing objects | |
| that can be dynamically fetched from the server. An assumption was | |
| made that loose objects are only created locally and therefore should | |
| not reference a missing object. We may need to revisit that assumption | |
| if, for example, we dynamically fetch a missing tree and store it as a | |
| loose object rather than a single object packfile.</p></div> | |
| <div class="paragraph"><p>This does not necessarily mean we need to mark loose objects as promisor; | |
| it may be sufficient to relax the object lookup or is-promisor functions.</p></div> | |
| </li> | |
| </ul></div> | |
| </div> | |
| </div> | |
| <div class="sect1"> | |
| <h2 id="_non_tasks">Non-Tasks</h2> | |
| <div class="sectionbody"> | |
| <div class="ulist"><ul> | |
| <li> | |
| <p> | |
| Every time the subject of "demand loading blobs" comes up it seems | |
| that someone suggests that the server be allowed to "guess" and send | |
| additional objects that may be related to the requested objects. | |
| </p> | |
| <div class="paragraph"><p>No work has gone into actually doing that; we’re just documenting that | |
| it is a common suggestion. We’re not sure how it would work and have | |
| no plans to work on it.</p></div> | |
| <div class="paragraph"><p>It is valid for the server to send more objects than requested (even | |
| for a dynamic object fetch), but we are not building on that.</p></div> | |
| </li> | |
| </ul></div> | |
| </div> | |
| </div> | |
| <div class="sect1"> | |
| <h2 id="_footnotes">Footnotes</h2> | |
| <div class="sectionbody"> | |
| <div class="paragraph"><p>[a] expensive-to-modify list of missing objects: Earlier in the design of | |
| partial clone we discussed the need for a single list of missing objects. | |
| This would essentially be a sorted linear list of OIDs that the were | |
| omitted by the server during a clone or subsequent fetches.</p></div> | |
| <div class="paragraph"><p>This file would need to be loaded into memory on every object lookup. | |
| It would need to be read, updated, and re-written (like the .git/index) | |
| on every explicit "git fetch" command <strong>and</strong> on any dynamic object fetch.</p></div> | |
| <div class="paragraph"><p>The cost to read, update, and write this file could add significant | |
| overhead to every command if there are many missing objects. For example, | |
| if there are 100M missing blobs, this file would be at least 2GiB on disk.</p></div> | |
| <div class="paragraph"><p>With the "promisor" concept, we <strong>infer</strong> a missing object based upon the | |
| type of packfile that references it.</p></div> | |
| </div> | |
| </div> | |
| <div class="sect1"> | |
| <h2 id="_related_links">Related Links</h2> | |
| <div class="sectionbody"> | |
| <div class="paragraph"><p>[0] <a href="https://crbug.com/git/2">https://crbug.com/git/2</a> | |
| Bug#2: Partial Clone</p></div> | |
| <div class="paragraph"><p>[1] <a href="https://public-inbox.org/git/20170113155253.1644-1-benpeart@microsoft.com/">https://public-inbox.org/git/20170113155253.1644-1-benpeart@microsoft.com/</a><br /> | |
| Subject: [RFC] Add support for downloading blobs on demand<br /> | |
| Date: Fri, 13 Jan 2017 10:52:53 -0500</p></div> | |
| <div class="paragraph"><p>[2] <a href="https://public-inbox.org/git/cover.1506714999.git.jonathantanmy@google.com/">https://public-inbox.org/git/cover.1506714999.git.jonathantanmy@google.com/</a><br /> | |
| Subject: [PATCH 00/18] Partial clone (from clone to lazy fetch in 18 patches)<br /> | |
| Date: Fri, 29 Sep 2017 13:11:36 -0700</p></div> | |
| <div class="paragraph"><p>[3] <a href="https://public-inbox.org/git/20170426221346.25337-1-jonathantanmy@google.com/">https://public-inbox.org/git/20170426221346.25337-1-jonathantanmy@google.com/</a><br /> | |
| Subject: Proposal for missing blob support in Git repos<br /> | |
| Date: Wed, 26 Apr 2017 15:13:46 -0700</p></div> | |
| <div class="paragraph"><p>[4] <a href="https://public-inbox.org/git/1488999039-37631-1-git-send-email-git@jeffhostetler.com/">https://public-inbox.org/git/1488999039-37631-1-git-send-email-git@jeffhostetler.com/</a><br /> | |
| Subject: [PATCH 00/10] RFC Partial Clone and Fetch<br /> | |
| Date: Wed, 8 Mar 2017 18:50:29 +0000</p></div> | |
| <div class="paragraph"><p>[5] <a href="https://public-inbox.org/git/20170505152802.6724-1-benpeart@microsoft.com/">https://public-inbox.org/git/20170505152802.6724-1-benpeart@microsoft.com/</a><br /> | |
| Subject: [PATCH v7 00/10] refactor the filter process code into a reusable module<br /> | |
| Date: Fri, 5 May 2017 11:27:52 -0400</p></div> | |
| <div class="paragraph"><p>[6] <a href="https://public-inbox.org/git/20170714132651.170708-1-benpeart@microsoft.com/">https://public-inbox.org/git/20170714132651.170708-1-benpeart@microsoft.com/</a><br /> | |
| Subject: [RFC/PATCH v2 0/1] Add support for downloading blobs on demand<br /> | |
| Date: Fri, 14 Jul 2017 09:26:50 -0400</p></div> | |
| </div> | |
| </div> | |
| </div> | |
| <div id="footnotes"><hr /></div> | |
| <div id="footer"> | |
| <div id="footer-text"> | |
| Last updated | |
| 2019-01-28 14:04:34 PST | |
| </div> | |
| </div> | |
| </body> | |
| </html> |