Skip to content

Commit 3b1d9e0

Browse files
benpeartgitster
authored andcommitted
eoie: add End of Index Entry (EOIE) extension
The End of Index Entry (EOIE) is used to locate the end of the variable length index entries and the beginning of the extensions. Code can take advantage of this to quickly locate the index extensions without having to parse through all of the index entries. The EOIE extension is always written out to the index file including to the shared index when using the split index feature. Because it is always written out, the SHA checksums in t/t1700-split-index.sh were updated to reflect its inclusion. It is written as an optional extension to ensure compatibility with other git implementations that do not yet support it. It is always written out to ensure it is available as often as possible to speed up index operations. Because it must be able to be loaded before the variable length cache entries and other index extensions, this extension must be written last. The signature for this extension is { 'E', 'O', 'I', 'E' }. The extension consists of: - 32-bit offset to the end of the index entries - 160-bit SHA-1 over the extension types and their sizes (but not their contents). E.g. if we have "TREE" extension that is N-bytes long, "REUC" extension that is M-bytes long, followed by "EOIE", then the hash would be: SHA-1("TREE" + <binary representation of N> + "REUC" + <binary representation of M>) Signed-off-by: Ben Peart <benpeart@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 371ed0d commit 3b1d9e0

File tree

3 files changed

+177
-12
lines changed

3 files changed

+177
-12
lines changed

Documentation/technical/index-format.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,3 +314,26 @@ The remaining data of each directory block is grouped by type:
314314

315315
- An ewah bitmap, the n-th bit indicates whether the n-th index entry
316316
is not CE_FSMONITOR_VALID.
317+
318+
== End of Index Entry
319+
320+
The End of Index Entry (EOIE) is used to locate the end of the variable
321+
length index entries and the begining of the extensions. Code can take
322+
advantage of this to quickly locate the index extensions without having
323+
to parse through all of the index entries.
324+
325+
Because it must be able to be loaded before the variable length cache
326+
entries and other index extensions, this extension must be written last.
327+
The signature for this extension is { 'E', 'O', 'I', 'E' }.
328+
329+
The extension consists of:
330+
331+
- 32-bit offset to the end of the index entries
332+
333+
- 160-bit SHA-1 over the extension types and their sizes (but not
334+
their contents). E.g. if we have "TREE" extension that is N-bytes
335+
long, "REUC" extension that is M-bytes long, followed by "EOIE",
336+
then the hash would be:
337+
338+
SHA-1("TREE" + <binary representation of N> +
339+
"REUC" + <binary representation of M>)

read-cache.c

Lines changed: 150 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#define CACHE_EXT_LINK 0x6c696e6b /* "link" */
4444
#define CACHE_EXT_UNTRACKED 0x554E5452 /* "UNTR" */
4545
#define CACHE_EXT_FSMONITOR 0x46534D4E /* "FSMN" */
46+
#define CACHE_EXT_ENDOFINDEXENTRIES 0x454F4945 /* "EOIE" */
4647

4748
/* changes that can be kept in $GIT_DIR/index (basically all extensions) */
4849
#define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
@@ -1693,6 +1694,9 @@ static int read_index_extension(struct index_state *istate,
16931694
case CACHE_EXT_FSMONITOR:
16941695
read_fsmonitor_extension(istate, data, sz);
16951696
break;
1697+
case CACHE_EXT_ENDOFINDEXENTRIES:
1698+
/* already handled in do_read_index() */
1699+
break;
16961700
default:
16971701
if (*ext < 'A' || 'Z' < *ext)
16981702
return error("index uses %.4s extension, which we do not understand",
@@ -1883,6 +1887,9 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries)
18831887
return ondisk_size + entries * per_entry;
18841888
}
18851889

1890+
static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
1891+
static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset);
1892+
18861893
/* remember to discard_cache() before reading a different cache! */
18871894
int do_read_index(struct index_state *istate, const char *path, int must_exist)
18881895
{
@@ -2190,11 +2197,15 @@ static int ce_write(git_hash_ctx *context, int fd, void *data, unsigned int len)
21902197
return 0;
21912198
}
21922199

2193-
static int write_index_ext_header(git_hash_ctx *context, int fd,
2194-
unsigned int ext, unsigned int sz)
2200+
static int write_index_ext_header(git_hash_ctx *context, git_hash_ctx *eoie_context,
2201+
int fd, unsigned int ext, unsigned int sz)
21952202
{
21962203
ext = htonl(ext);
21972204
sz = htonl(sz);
2205+
if (eoie_context) {
2206+
the_hash_algo->update_fn(eoie_context, &ext, 4);
2207+
the_hash_algo->update_fn(eoie_context, &sz, 4);
2208+
}
21982209
return ((ce_write(context, fd, &ext, 4) < 0) ||
21992210
(ce_write(context, fd, &sz, 4) < 0)) ? -1 : 0;
22002211
}
@@ -2437,7 +2448,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
24372448
{
24382449
uint64_t start = getnanotime();
24392450
int newfd = tempfile->fd;
2440-
git_hash_ctx c;
2451+
git_hash_ctx c, eoie_c;
24412452
struct cache_header hdr;
24422453
int i, err = 0, removed, extended, hdr_version;
24432454
struct cache_entry **cache = istate->cache;
@@ -2446,6 +2457,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
24462457
struct ondisk_cache_entry_extended ondisk;
24472458
struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
24482459
int drop_cache_tree = istate->drop_cache_tree;
2460+
off_t offset;
24492461

24502462
for (i = removed = extended = 0; i < entries; i++) {
24512463
if (cache[i]->ce_flags & CE_REMOVE)
@@ -2479,6 +2491,10 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
24792491
if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
24802492
return -1;
24812493

2494+
offset = lseek(newfd, 0, SEEK_CUR);
2495+
if (offset < 0)
2496+
return -1;
2497+
offset += write_buffer_len;
24822498
previous_name = (hdr_version == 4) ? &previous_name_buf : NULL;
24832499

24842500
for (i = 0; i < entries; i++) {
@@ -2512,11 +2528,17 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
25122528
return err;
25132529

25142530
/* Write extension data here */
2531+
offset = lseek(newfd, 0, SEEK_CUR);
2532+
if (offset < 0)
2533+
return -1;
2534+
offset += write_buffer_len;
2535+
the_hash_algo->init_fn(&eoie_c);
2536+
25152537
if (!strip_extensions && istate->split_index) {
25162538
struct strbuf sb = STRBUF_INIT;
25172539

25182540
err = write_link_extension(&sb, istate) < 0 ||
2519-
write_index_ext_header(&c, newfd, CACHE_EXT_LINK,
2541+
write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_LINK,
25202542
sb.len) < 0 ||
25212543
ce_write(&c, newfd, sb.buf, sb.len) < 0;
25222544
strbuf_release(&sb);
@@ -2527,7 +2549,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
25272549
struct strbuf sb = STRBUF_INIT;
25282550

25292551
cache_tree_write(&sb, istate->cache_tree);
2530-
err = write_index_ext_header(&c, newfd, CACHE_EXT_TREE, sb.len) < 0
2552+
err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_TREE, sb.len) < 0
25312553
|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
25322554
strbuf_release(&sb);
25332555
if (err)
@@ -2537,7 +2559,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
25372559
struct strbuf sb = STRBUF_INIT;
25382560

25392561
resolve_undo_write(&sb, istate->resolve_undo);
2540-
err = write_index_ext_header(&c, newfd, CACHE_EXT_RESOLVE_UNDO,
2562+
err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_RESOLVE_UNDO,
25412563
sb.len) < 0
25422564
|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
25432565
strbuf_release(&sb);
@@ -2548,7 +2570,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
25482570
struct strbuf sb = STRBUF_INIT;
25492571

25502572
write_untracked_extension(&sb, istate->untracked);
2551-
err = write_index_ext_header(&c, newfd, CACHE_EXT_UNTRACKED,
2573+
err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_UNTRACKED,
25522574
sb.len) < 0 ||
25532575
ce_write(&c, newfd, sb.buf, sb.len) < 0;
25542576
strbuf_release(&sb);
@@ -2559,7 +2581,24 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
25592581
struct strbuf sb = STRBUF_INIT;
25602582

25612583
write_fsmonitor_extension(&sb, istate);
2562-
err = write_index_ext_header(&c, newfd, CACHE_EXT_FSMONITOR, sb.len) < 0
2584+
err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_FSMONITOR, sb.len) < 0
2585+
|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
2586+
strbuf_release(&sb);
2587+
if (err)
2588+
return -1;
2589+
}
2590+
2591+
/*
2592+
* CACHE_EXT_ENDOFINDEXENTRIES must be written as the last entry before the SHA1
2593+
* so that it can be found and processed before all the index entries are
2594+
* read. Write it out regardless of the strip_extensions parameter as we need it
2595+
* when loading the shared index.
2596+
*/
2597+
if (offset) {
2598+
struct strbuf sb = STRBUF_INIT;
2599+
2600+
write_eoie_extension(&sb, &eoie_c, offset);
2601+
err = write_index_ext_header(&c, NULL, newfd, CACHE_EXT_ENDOFINDEXENTRIES, sb.len) < 0
25632602
|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
25642603
strbuf_release(&sb);
25652604
if (err)
@@ -2975,3 +3014,106 @@ int should_validate_cache_entries(void)
29753014

29763015
return validate_index_cache_entries;
29773016
}
3017+
3018+
#define EOIE_SIZE (4 + GIT_SHA1_RAWSZ) /* <4-byte offset> + <20-byte hash> */
3019+
#define EOIE_SIZE_WITH_HEADER (4 + 4 + EOIE_SIZE) /* <4-byte signature> + <4-byte length> + EOIE_SIZE */
3020+
3021+
static size_t read_eoie_extension(const char *mmap, size_t mmap_size)
3022+
{
3023+
/*
3024+
* The end of index entries (EOIE) extension is guaranteed to be last
3025+
* so that it can be found by scanning backwards from the EOF.
3026+
*
3027+
* "EOIE"
3028+
* <4-byte length>
3029+
* <4-byte offset>
3030+
* <20-byte hash>
3031+
*/
3032+
const char *index, *eoie;
3033+
uint32_t extsize;
3034+
size_t offset, src_offset;
3035+
unsigned char hash[GIT_MAX_RAWSZ];
3036+
git_hash_ctx c;
3037+
3038+
/* ensure we have an index big enough to contain an EOIE extension */
3039+
if (mmap_size < sizeof(struct cache_header) + EOIE_SIZE_WITH_HEADER + the_hash_algo->rawsz)
3040+
return 0;
3041+
3042+
/* validate the extension signature */
3043+
index = eoie = mmap + mmap_size - EOIE_SIZE_WITH_HEADER - the_hash_algo->rawsz;
3044+
if (CACHE_EXT(index) != CACHE_EXT_ENDOFINDEXENTRIES)
3045+
return 0;
3046+
index += sizeof(uint32_t);
3047+
3048+
/* validate the extension size */
3049+
extsize = get_be32(index);
3050+
if (extsize != EOIE_SIZE)
3051+
return 0;
3052+
index += sizeof(uint32_t);
3053+
3054+
/*
3055+
* Validate the offset we're going to look for the first extension
3056+
* signature is after the index header and before the eoie extension.
3057+
*/
3058+
offset = get_be32(index);
3059+
if (mmap + offset < mmap + sizeof(struct cache_header))
3060+
return 0;
3061+
if (mmap + offset >= eoie)
3062+
return 0;
3063+
index += sizeof(uint32_t);
3064+
3065+
/*
3066+
* The hash is computed over extension types and their sizes (but not
3067+
* their contents). E.g. if we have "TREE" extension that is N-bytes
3068+
* long, "REUC" extension that is M-bytes long, followed by "EOIE",
3069+
* then the hash would be:
3070+
*
3071+
* SHA-1("TREE" + <binary representation of N> +
3072+
* "REUC" + <binary representation of M>)
3073+
*/
3074+
src_offset = offset;
3075+
the_hash_algo->init_fn(&c);
3076+
while (src_offset < mmap_size - the_hash_algo->rawsz - EOIE_SIZE_WITH_HEADER) {
3077+
/* After an array of active_nr index entries,
3078+
* there can be arbitrary number of extended
3079+
* sections, each of which is prefixed with
3080+
* extension name (4-byte) and section length
3081+
* in 4-byte network byte order.
3082+
*/
3083+
uint32_t extsize;
3084+
memcpy(&extsize, mmap + src_offset + 4, 4);
3085+
extsize = ntohl(extsize);
3086+
3087+
/* verify the extension size isn't so large it will wrap around */
3088+
if (src_offset + 8 + extsize < src_offset)
3089+
return 0;
3090+
3091+
the_hash_algo->update_fn(&c, mmap + src_offset, 8);
3092+
3093+
src_offset += 8;
3094+
src_offset += extsize;
3095+
}
3096+
the_hash_algo->final_fn(hash, &c);
3097+
if (!hasheq(hash, (const unsigned char *)index))
3098+
return 0;
3099+
3100+
/* Validate that the extension offsets returned us back to the eoie extension. */
3101+
if (src_offset != mmap_size - the_hash_algo->rawsz - EOIE_SIZE_WITH_HEADER)
3102+
return 0;
3103+
3104+
return offset;
3105+
}
3106+
3107+
static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset)
3108+
{
3109+
uint32_t buffer;
3110+
unsigned char hash[GIT_MAX_RAWSZ];
3111+
3112+
/* offset */
3113+
put_be32(&buffer, offset);
3114+
strbuf_add(sb, &buffer, sizeof(uint32_t));
3115+
3116+
/* hash */
3117+
the_hash_algo->final_fn(hash, eoie_context);
3118+
strbuf_add(sb, hash, the_hash_algo->rawsz);
3119+
}

t/t1700-split-index.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ test_expect_success 'enable split index' '
1515
indexversion=$(test-tool index-version <.git/index) &&
1616
if test "$indexversion" = "4"
1717
then
18-
own=432ef4b63f32193984f339431fd50ca796493569
19-
base=508851a7f0dfa8691e9f69c7f055865389012491
18+
own=3527df833c6c100d3d1d921a9a782d62a8be4b58
19+
base=746f7ab2ed44fb839efdfbffcf399d0b113fb4cb
2020
else
21-
own=8299b0bcd1ac364e5f1d7768efb62fa2da79a339
22-
base=39d890139ee5356c7ef572216cebcd27aa41f9df
21+
own=5e9b60117ece18da410ddecc8b8d43766a0e4204
22+
base=4370042739b31cd17a5c5cd6043a77c9a00df113
2323
fi &&
2424
cat >expect <<-EOF &&
2525
own $own

0 commit comments

Comments
 (0)