gitgitgadget · derrickstolee · Oct 21, 2022 · Oct 24, 2022 · Oct 10, 2022 · Nov 3, 2022
diff --git a/Documentation/config.txt b/Documentation/config.txt
@@ -493,6 +493,8 @@ include::config/rebase.txt[]
 
 include::config/receive.txt[]
 
+include::config/refs.txt[]
+
 include::config/remote.txt[]
 
 include::config/remotes.txt[]

diff --git a/Documentation/config/extensions.txt b/Documentation/config/extensions.txt
@@ -7,6 +7,69 @@ Note that this setting should only be set by linkgit:git-init[1] or
 linkgit:git-clone[1]. Trying to change it after initialization will not
 work and will produce hard-to-diagnose issues.
 
+extensions.refFormat::
+Specify the reference storage mechanisms used by the repoitory as a
+multi-valued list. The acceptable values are `files` and `packed`.
+If not specified, the list of `files` and `packed` is assumed. It
+is an error to specify this key unless `core.repositoryFormatVersion`
+is 1.
++
+As new ref formats are added, Git commands may modify this list before and
+after upgrading the on-disk reference storage files. The specific values
+indicate the existence of different layers:
++
+--
+`files`;;
+When present, references may be stored as "loose" reference files
+in the `$GIT_DIR/refs/` directory. The name of the reference
+corresponds to the filename after `$GIT_DIR` and the file contains
+an object ID as a hexadecimal string. If a loose reference file
+exists, then its value takes precedence over all other formats.
+
+`packed`;;
+When present, references may be stored as a group in a
+`packed-refs` file in its version 1 format. When grouped with
+`"files"` or provided on its own, this file is located at
+`$GIT_DIR/packed-refs`. This file contains a list of distinct
+reference names, paired with their object IDs. When combined with
+`files`, the `packed` format will only be used to group multiple
+loose object files upon request via the `git pack-refs` command or
+via the `pack-refs` maintenance task.
+
+`packed-v2`;;
+When present, references may be stored as a group in a
+`packed-refs` file in its version 2 format. This file is in the
+same position and interacts with loose refs the same as when the
+`packed` value exists. Both `packed` and `packed-v2` must exist to
+upgrade an existing `packed-refs` file from version 1 to version 2
+or to downgrade from version 2 to version 1. When both are
+present, the `refs.packedRefsVersion` config value indicates which
+file format version is used during writes, but both versions are
+understood when reading the file.
+--
++
+The following combinations are supported by this version of Git:
++
+--
+`files` and (`packed` and/or `packed-v2`);;
+This set of values indicates that references are stored both as
+loose reference files and in the `packed-refs` file. Loose
+references are preferred, and the `packed-refs` file is updated
+only when deleting a reference that is stored in the `packed-refs`
+file or during a `git pack-refs` command.
++
+The presence of `packed` and `packed-v2` specifies whether the `packed-refs`
+file is allowed to be in its v1 or v2 formats, respectively. When only one
+is present, Git will refuse to read the `packed-refs` file that do not
+match the expected format. When both are present, the `refs.packedRefsVersion`
+config option indicates which file format is used during writes.
+
+`files`;;
+When only this value is present, Git will ignore the `packed-refs`
+file and refuse to write one during `git pack-refs`. All references
+will be read from and written to loose reference files.
+--
+
 extensions.worktreeConfig::
 If enabled, then worktrees will load config settings from the
 `$GIT_DIR/config.worktree` file in addition to the
@@ -21,10 +84,15 @@ When enabling `extensions.worktreeConfig`, you must be careful to move
 certain values from the common config file to the main working tree's
 `config.worktree` file, if present:
 +
-* `core.worktree` must be moved from `$GIT_COMMON_DIR/config` to
- `$GIT_COMMON_DIR/config.worktree`.
-* If `core.bare` is true, then it must be moved from `$GIT_COMMON_DIR/config`
- to `$GIT_COMMON_DIR/config.worktree`.
+--
+`core.worktree`;;
+This config value must be moved from `$GIT_COMMON_DIR/config` to
+`$GIT_COMMON_DIR/config.worktree`.
+
+`core.bare`;;
+If true, then this value must be moved from
+`$GIT_COMMON_DIR/config` to `$GIT_COMMON_DIR/config.worktree`.
+--
 +
 It may also be beneficial to adjust the locations of `core.sparseCheckout`
 and `core.sparseCheckoutCone` depending on your desire for customizable

diff --git a/Documentation/config/index.txt b/Documentation/config/index.txt
@@ -30,3 +30,11 @@ index.version::
 Specify the version with which new index files should be
 initialized. This does not affect existing repositories.
 If `feature.manyFiles` is enabled, then the default is 4.
+
+index.computeHash::
+When enabled, compute the hash of the index file as it is written
+and store the hash at the end of the content. This is enabled by
+default.
++
+If you disable `index.computHash`, then older Git clients may report that
+your index is corrupt during `git fsck`.
diff --git a/Documentation/config/refs.txt b/Documentation/config/refs.txt
@@ -0,0 +1,13 @@
+refs.packedRefsVersion::
+Specifies the file format version to use when writing a `packed-refs`
+file. Defaults to `1`.
++
+The only other value currently allowed is `2`, which uses a structured file
+format to result in a smaller `packed-refs` file. In order to write this
+file format version, the repository must also have the `packed-v2` extension
+enabled. The most typical setup will include the
+`core.repositoryFormatVersion=1` config value and the `extensions.refFormat`
+key will have three values: `files`, `packed`, and `packed-v2`.
++
+If `extensions.refFormat` has the value `packed-v2` and not `packed`, then
+`refs.packedRefsVersion` defaults to `2`.
diff --git a/Documentation/gitformat-chunk.txt b/Documentation/gitformat-chunk.txt
@@ -24,8 +24,9 @@ how they use the chunks to describe structured data.
 
 A chunk-based file format begins with some header information custom to
 that format. That header should include enough information to identify
-the file type, format version, and number of chunks in the file. From this
-information, that file can determine the start of the chunk-based region.
+the file type, format version, and (optionally) the number of chunks in
+the file. From this information, that file can determine the start of the
+chunk-based region.
 
 The chunk-based region starts with a table of contents describing where
 each chunk starts and ends. This consists of (C+1) rows of 12 bytes each,
@@ -51,8 +52,27 @@ The final entry in the table of contents must be four zero bytes. This
 confirms that the table of contents is ending and provides the offset for
 the end of the chunk-based data.
 
+The default chunk format assumes the table of contents appears at the
+beginning of the file (after the header information) and the chunks are
+ordered by increasing offset. Alternatively, the chunk format allows a
+table of contents that is placed at the end of the file (before the
+trailing hash) and the offsets are in descending order. In this trailing
+table of contents case, the data in order looks instead like the following
+table:
+
+ | Chunk ID (4 bytes) | Chunk Offset (8 bytes) |
+ |--------------------|------------------------|
+ | 0x0000 | OFFSET[C+1] |
+ | ID[C] | OFFSET[C] |
+ | ... | ... |
+ | ID[0] | OFFSET[0] |
+
+The concrete file format that uses the chunk format will mention that it
+uses a trailing table of contents if it uses it. By default, the table of
+contents is in ascending order before all chunk data.
+
 Note: The chunk-based format expects that the file contains _at least_ a
-trailing hash after `OFFSET[C+1]`.
+trailing hash after either `OFFSET[C+1]` or the trailing table of contents.
 
 Functions for working with chunk-based file formats are declared in
 `chunk-format.h`. Using these methods provide extra checks that assist

diff --git a/Makefile b/Makefile
@@ -1057,6 +1057,8 @@ LIB_OBJS += refs/debug.o
 LIB_OBJS += refs/files-backend.o
 LIB_OBJS += refs/iterator.o
 LIB_OBJS += refs/packed-backend.o
+LIB_OBJS += refs/packed-format-v1.o
+LIB_OBJS += refs/packed-format-v2.o
 LIB_OBJS += refs/ref-cache.o
 LIB_OBJS += refspec.o
 LIB_OBJS += remote.o

diff --git a/cache.h b/cache.h
@@ -1155,6 +1155,8 @@ struct repository_format {
 int hash_algo;
 int sparse_index;
 char *work_tree;
+int ref_format_count;
+enum ref_format_flags ref_format;
 struct string_list unknown_extensions;
 struct string_list v1_only_extensions;
 };

diff --git a/chunk-format.c b/chunk-format.c
@@ -13,6 +13,7 @@ struct chunk_info {
 chunk_write_fn write_fn;
 
 const void *start;
+off_t offset;
 };
 
 struct chunkfile {
@@ -56,38 +57,59 @@ void add_chunk(struct chunkfile *cf,
 cf->chunks_nr++;
 }
 
-int write_chunkfile(struct chunkfile *cf, void *data)
+int write_chunkfile(struct chunkfile *cf,
+ enum chunkfile_flags flags,
+ void *data)
 {
 int i, result = 0;
-uint64_t cur_offset = hashfile_total(cf->f);
 
 trace2_region_enter("chunkfile", "write", the_repository);
 
-/* Add the table of contents to the current offset */
-cur_offset += (cf->chunks_nr + 1) * CHUNK_TOC_ENTRY_SIZE;
+if (!(flags & CHUNKFILE_TRAILING_TOC)) {
+uint64_t cur_offset = hashfile_total(cf->f);
 
-for (i = 0; i < cf->chunks_nr; i++) {
-hashwrite_be32(cf->f, cf->chunks[i].id);
-hashwrite_be64(cf->f, cur_offset);
+/* Add the table of contents to the current offset */
+cur_offset += (cf->chunks_nr + 1) * CHUNK_TOC_ENTRY_SIZE;
 
-cur_offset += cf->chunks[i].size;
-}
+for (i = 0; i < cf->chunks_nr; i++) {
+hashwrite_be32(cf->f, cf->chunks[i].id);
+hashwrite_be64(cf->f, cur_offset);
+
+cur_offset += cf->chunks[i].size;
+}
 
-/* Trailing entry marks the end of the chunks */
-hashwrite_be32(cf->f, 0);
-hashwrite_be64(cf->f, cur_offset);
+/* Trailing entry marks the end of the chunks */
+hashwrite_be32(cf->f, 0);
+hashwrite_be64(cf->f, cur_offset);
+}
 
 for (i = 0; i < cf->chunks_nr; i++) {
-off_t start_offset = hashfile_total(cf->f);
+cf->chunks[i].offset = hashfile_total(cf->f);
 result = cf->chunks[i].write_fn(cf->f, data);
 
 if (result)
 goto cleanup;
 
-if (hashfile_total(cf->f) - start_offset != cf->chunks[i].size)
-BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
- cf->chunks[i].size, cf->chunks[i].id,
- hashfile_total(cf->f) - start_offset);
+if (!(flags & CHUNKFILE_TRAILING_TOC)) {
+if (hashfile_total(cf->f) - cf->chunks[i].offset != cf->chunks[i].size)
+BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
+ cf->chunks[i].size, cf->chunks[i].id,
+ hashfile_total(cf->f) - cf->chunks[i].offset);
+}
+
+cf->chunks[i].size = hashfile_total(cf->f) - cf->chunks[i].offset;
+}
+
+if (flags & CHUNKFILE_TRAILING_TOC) {
+size_t last_chunk_tail = hashfile_total(cf->f);
+/* First entry marks the end of the chunks */
+hashwrite_be32(cf->f, 0);
+hashwrite_be64(cf->f, last_chunk_tail);
+
+for (i = cf->chunks_nr - 1; i >= 0; i--) {
+hashwrite_be32(cf->f, cf->chunks[i].id);
+hashwrite_be64(cf->f, cf->chunks[i].offset);
+}
 }
 
 cleanup:
@@ -151,6 +173,59 @@ int read_table_of_contents(struct chunkfile *cf,
 return 0;
 }
 
+int read_trailing_table_of_contents(struct chunkfile *cf,
+ const unsigned char *mfile,
+ size_t mfile_size)
+{
+int i;
+uint32_t chunk_id;
+const unsigned char *table_of_contents = mfile + mfile_size - the_hash_algo->rawsz;
+
+while (1) {
+uint64_t chunk_offset;
+
+table_of_contents -= CHUNK_TOC_ENTRY_SIZE;
+
+chunk_id = get_be32(table_of_contents);
+chunk_offset = get_be64(table_of_contents + 4);
+
+/* Calculate the previous chunk size, if it exists. */
+if (cf->chunks_nr) {
+off_t previous_offset = cf->chunks[cf->chunks_nr - 1].offset;
+
+if (chunk_offset < previous_offset ||
+ chunk_offset > table_of_contents - mfile) {
+error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
+previous_offset, chunk_offset);
+return -1;
+}
+
+cf->chunks[cf->chunks_nr - 1].size = chunk_offset - previous_offset;
+}
+
+/* Stop at the null chunk. We only need it for the last size. */
+if (!chunk_id)
+break;
+
+for (i = 0; i < cf->chunks_nr; i++) {
+if (cf->chunks[i].id == chunk_id) {
+error(_("duplicate chunk ID %"PRIx32" found"),
+chunk_id);
+return -1;
+}
+}
+
+ALLOC_GROW(cf->chunks, cf->chunks_nr + 1, cf->chunks_alloc);
+
+cf->chunks[cf->chunks_nr].id = chunk_id;
+cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
+cf->chunks[cf->chunks_nr].offset = chunk_offset;
+cf->chunks_nr++;
+}
+
+return 0;
+}
+
 static int pair_chunk_fn(const unsigned char *chunk_start,
  size_t chunk_size,
  void *data)

diff --git a/chunk-format.h b/chunk-format.h
@@ -31,14 +31,30 @@ void add_chunk(struct chunkfile *cf,
  uint32_t id,
  size_t size,
  chunk_write_fn fn);
-int write_chunkfile(struct chunkfile *cf, void *data);
+
+enum chunkfile_flags {
+CHUNKFILE_TRAILING_TOC = (1 << 0),
+};
+
+int write_chunkfile(struct chunkfile *cf,
+ enum chunkfile_flags flags,
+ void *data);
 
 int read_table_of_contents(struct chunkfile *cf,
  const unsigned char *mfile,
  size_t mfile_size,
  uint64_t toc_offset,
  int toc_length);
 
+/**
+ * Read the given chunkfile, but read the table of contents from the
+ * end of the given mfile. The file is expected to be a hashfile with
+ * the_hash_file->rawsz bytes at the end storing the hash.
+ */
+int read_trailing_table_of_contents(struct chunkfile *cf,
+ const unsigned char *mfile,
+ size_t mfile_size);
+
 #define CHUNK_NOT_FOUND (-2)
 
 /*

diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh
@@ -30,6 +30,7 @@ linux-TEST-vars)
 export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master
 export GIT_TEST_WRITE_REV_INDEX=1
 export GIT_TEST_CHECKOUT_WORKERS=2
+export GIT_TEST_PACKED_REFS_VERSION=2
 ;;
 linux-clang)
 export GIT_TEST_DEFAULT_HASH=sha1

diff --git a/commit-graph.c b/commit-graph.c
@@ -1932,7 +1932,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 get_num_chunks(cf) * ctx->commits.nr);
 }
 
-write_chunkfile(cf, ctx);
+write_chunkfile(cf, 0, ctx);
 
 stop_progress(&ctx->progress);
 strbuf_release(&progress_title);