Skip to content

Commit eb32d23

Browse files
Nicolas PitreJunio C Hamano
authored andcommitted
introduce delta objects with offset to base
This adds a new object, namely OBJ_OFS_DELTA, renames OBJ_DELTA to OBJ_REF_DELTA to better make the distinction between those two delta objects, and adds support for the handling of those new delta objects in sha1_file.c only. The OBJ_OFS_DELTA contains a relative offset from the delta object's position in a pack instead of the 20-byte SHA1 reference to identify the base object. Since the base is likely to be not so far away, the relative offset is more likely to have a smaller encoding on average than an absolute offset. And for those delta objects the base must always be stored first because there is no way to know the distance of later objects when streaming a pack. Hence this relative offset is always meant to be negative. The offset encoding is slightly denser than the one used for object size -- credits to <linux@horizon.com> (whoever this is) for bringing it to my attention. This allows for pack size reduction between 3.2% (Linux-2.6) to over 5% (linux-historic). Runtime pack access should be faster too since delta replay does skip a search in the pack index for each delta in a chain. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
1 parent 4a0641b commit eb32d23

File tree

5 files changed

+82
-45
lines changed

5 files changed

+82
-45
lines changed

builtin-pack-objects.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
232232
int n = 1;
233233
unsigned char c;
234234

235-
if (type < OBJ_COMMIT || type > OBJ_DELTA)
235+
if (type < OBJ_COMMIT || type > OBJ_REF_DELTA)
236236
die("bad type %d", type);
237237

238238
c = (type << 4) | (size & 15);
@@ -297,7 +297,7 @@ static int revalidate_pack_entry(struct object_entry *entry, unsigned char *data
297297
used = unpack_object_header_gently(data, len, &type, &size);
298298
if (!used)
299299
return -1;
300-
if (type == OBJ_DELTA)
300+
if (type == OBJ_REF_DELTA)
301301
used += 20; /* skip base object name */
302302
data += used;
303303
len -= used;
@@ -340,7 +340,7 @@ static unsigned long write_object(struct sha1file *f,
340340
obj_type = entry->type;
341341
if (! entry->in_pack)
342342
to_reuse = 0; /* can't reuse what we don't have */
343-
else if (obj_type == OBJ_DELTA)
343+
else if (obj_type == OBJ_REF_DELTA)
344344
to_reuse = 1; /* check_object() decided it for us */
345345
else if (obj_type != entry->in_pack_type)
346346
to_reuse = 0; /* pack has delta which is unusable */
@@ -380,7 +380,7 @@ static unsigned long write_object(struct sha1file *f,
380380
if (entry->delta) {
381381
buf = delta_against(buf, size, entry);
382382
size = entry->delta_size;
383-
obj_type = OBJ_DELTA;
383+
obj_type = OBJ_REF_DELTA;
384384
}
385385
/*
386386
* The object header is a byte of 'type' followed by zero or
@@ -409,11 +409,11 @@ static unsigned long write_object(struct sha1file *f,
409409
sha1write(f, buf, datalen);
410410
unuse_packed_git(p);
411411
hdrlen = 0; /* not really */
412-
if (obj_type == OBJ_DELTA)
412+
if (obj_type == OBJ_REF_DELTA)
413413
reused_delta++;
414414
reused++;
415415
}
416-
if (obj_type == OBJ_DELTA)
416+
if (obj_type == OBJ_REF_DELTA)
417417
written_delta++;
418418
written++;
419419
return hdrlen + datalen;
@@ -916,7 +916,7 @@ static void check_object(struct object_entry *entry)
916916
* delta.
917917
*/
918918
if (!no_reuse_delta &&
919-
entry->in_pack_type == OBJ_DELTA &&
919+
entry->in_pack_type == OBJ_REF_DELTA &&
920920
(base_entry = locate_object_entry(base)) &&
921921
(!base_entry->preferred_base)) {
922922

@@ -929,7 +929,7 @@ static void check_object(struct object_entry *entry)
929929
/* uncompressed size of the delta data */
930930
entry->size = entry->delta_size = size;
931931
entry->delta = base_entry;
932-
entry->type = OBJ_DELTA;
932+
entry->type = OBJ_REF_DELTA;
933933

934934
entry->delta_sibling = base_entry->delta_child;
935935
base_entry->delta_child = entry;

builtin-unpack-objects.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ static void unpack_one(unsigned nr, unsigned total)
241241
case OBJ_TAG:
242242
unpack_non_delta_entry(type, size);
243243
return;
244-
case OBJ_DELTA:
244+
case OBJ_REF_DELTA:
245245
unpack_delta_entry(size);
246246
return;
247247
default:

cache.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,8 +274,9 @@ enum object_type {
274274
OBJ_TREE = 2,
275275
OBJ_BLOB = 3,
276276
OBJ_TAG = 4,
277-
/* 5/6 for future expansion */
278-
OBJ_DELTA = 7,
277+
/* 5 for future expansion */
278+
OBJ_OFS_DELTA = 6,
279+
OBJ_REF_DELTA = 7,
279280
OBJ_BAD,
280281
};
281282

index-pack.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ static void *unpack_raw_entry(unsigned long offset,
158158
}
159159

160160
switch (type) {
161-
case OBJ_DELTA:
161+
case OBJ_REF_DELTA:
162162
if (pos + 20 >= pack_limit)
163163
bad_object(offset, "object extends past end of pack");
164164
hashcpy(delta_base, pack_base + pos);
@@ -301,7 +301,7 @@ static void parse_pack_objects(void)
301301
data = unpack_raw_entry(offset, &obj->type, &data_size,
302302
base_sha1, &offset);
303303
obj->real_type = obj->type;
304-
if (obj->type == OBJ_DELTA) {
304+
if (obj->type == OBJ_REF_DELTA) {
305305
struct delta_entry *delta = &deltas[nr_deltas++];
306306
delta->obj = obj;
307307
hashcpy(delta->base_sha1, base_sha1);
@@ -328,7 +328,7 @@ static void parse_pack_objects(void)
328328
struct object_entry *obj = &objects[i];
329329
int j, first, last;
330330

331-
if (obj->type == OBJ_DELTA)
331+
if (obj->type == OBJ_REF_DELTA)
332332
continue;
333333
if (find_deltas_based_on_sha1(obj->sha1, &first, &last))
334334
continue;
@@ -341,7 +341,7 @@ static void parse_pack_objects(void)
341341

342342
/* Check for unresolved deltas */
343343
for (i = 0; i < nr_deltas; i++) {
344-
if (deltas[i].obj->real_type == OBJ_DELTA)
344+
if (deltas[i].obj->real_type == OBJ_REF_DELTA)
345345
die("packfile '%s' has unresolved deltas", pack_name);
346346
}
347347
}

sha1_file.c

Lines changed: 66 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -883,26 +883,61 @@ void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned l
883883
return unpack_sha1_rest(&stream, hdr, *size);
884884
}
885885

886+
static unsigned long get_delta_base(struct packed_git *p,
887+
unsigned long offset,
888+
enum object_type kind,
889+
unsigned long delta_obj_offset,
890+
unsigned long *base_obj_offset)
891+
{
892+
unsigned char *base_info = (unsigned char *) p->pack_base + offset;
893+
unsigned long base_offset;
894+
895+
/* there must be at least 20 bytes left regardless of delta type */
896+
if (p->pack_size <= offset + 20)
897+
die("truncated pack file");
898+
899+
if (kind == OBJ_OFS_DELTA) {
900+
unsigned used = 0;
901+
unsigned char c = base_info[used++];
902+
base_offset = c & 127;
903+
while (c & 128) {
904+
base_offset += 1;
905+
if (!base_offset || base_offset & ~(~0UL >> 7))
906+
die("offset value overflow for delta base object");
907+
c = base_info[used++];
908+
base_offset = (base_offset << 7) + (c & 127);
909+
}
910+
base_offset = delta_obj_offset - base_offset;
911+
if (base_offset >= delta_obj_offset)
912+
die("delta base offset out of bound");
913+
offset += used;
914+
} else if (kind == OBJ_REF_DELTA) {
915+
/* The base entry _must_ be in the same pack */
916+
base_offset = find_pack_entry_one(base_info, p);
917+
if (!base_offset)
918+
die("failed to find delta-pack base object %s",
919+
sha1_to_hex(base_info));
920+
offset += 20;
921+
} else
922+
die("I am totally screwed");
923+
*base_obj_offset = base_offset;
924+
return offset;
925+
}
926+
886927
/* forward declaration for a mutually recursive function */
887928
static int packed_object_info(struct packed_git *p, unsigned long offset,
888929
char *type, unsigned long *sizep);
889930

890931
static int packed_delta_info(struct packed_git *p,
891932
unsigned long offset,
933+
enum object_type kind,
934+
unsigned long obj_offset,
892935
char *type,
893936
unsigned long *sizep)
894937
{
895938
unsigned long base_offset;
896-
unsigned char *base_sha1 = (unsigned char *) p->pack_base + offset;
897939

898-
if (p->pack_size < offset + 20)
899-
die("truncated pack file");
900-
/* The base entry _must_ be in the same pack */
901-
base_offset = find_pack_entry_one(base_sha1, p);
902-
if (!base_offset)
903-
die("failed to find delta-pack base object %s",
904-
sha1_to_hex(base_sha1));
905-
offset += 20;
940+
offset = get_delta_base(p, offset, kind, obj_offset, &base_offset);
906941

907942
/* We choose to only get the type of the base object and
908943
* ignore potentially corrupt pack file that expects the delta
@@ -975,7 +1010,7 @@ int check_reuse_pack_delta(struct packed_git *p, unsigned long offset,
9751010
use_packed_git(p);
9761011
ptr = offset;
9771012
ptr = unpack_object_header(p, ptr, kindp, sizep);
978-
if (*kindp != OBJ_DELTA)
1013+
if (*kindp != OBJ_REF_DELTA)
9791014
goto done;
9801015
hashcpy(base, (unsigned char *) p->pack_base + ptr);
9811016
status = 0;
@@ -992,11 +1027,12 @@ void packed_object_info_detail(struct packed_git *p,
9921027
unsigned int *delta_chain_length,
9931028
unsigned char *base_sha1)
9941029
{
995-
unsigned long val;
1030+
unsigned long obj_offset, val;
9961031
unsigned char *next_sha1;
9971032
enum object_type kind;
9981033

9991034
*delta_chain_length = 0;
1035+
obj_offset = offset;
10001036
offset = unpack_object_header(p, offset, &kind, size);
10011037

10021038
for (;;) {
@@ -1011,7 +1047,13 @@ void packed_object_info_detail(struct packed_git *p,
10111047
strcpy(type, type_names[kind]);
10121048
*store_size = 0; /* notyet */
10131049
return;
1014-
case OBJ_DELTA:
1050+
case OBJ_OFS_DELTA:
1051+
get_delta_base(p, offset, kind, obj_offset, &offset);
1052+
if (*delta_chain_length == 0) {
1053+
/* TODO: find base_sha1 as pointed by offset */
1054+
}
1055+
break;
1056+
case OBJ_REF_DELTA:
10151057
if (p->pack_size <= offset + 20)
10161058
die("pack file %s records an incomplete delta base",
10171059
p->pack_name);
@@ -1021,6 +1063,7 @@ void packed_object_info_detail(struct packed_git *p,
10211063
offset = find_pack_entry_one(next_sha1, p);
10221064
break;
10231065
}
1066+
obj_offset = offset;
10241067
offset = unpack_object_header(p, offset, &kind, &val);
10251068
(*delta_chain_length)++;
10261069
}
@@ -1029,15 +1072,15 @@ void packed_object_info_detail(struct packed_git *p,
10291072
static int packed_object_info(struct packed_git *p, unsigned long offset,
10301073
char *type, unsigned long *sizep)
10311074
{
1032-
unsigned long size;
1075+
unsigned long size, obj_offset = offset;
10331076
enum object_type kind;
10341077

10351078
offset = unpack_object_header(p, offset, &kind, &size);
10361079

1037-
if (kind == OBJ_DELTA)
1038-
return packed_delta_info(p, offset, type, sizep);
1039-
10401080
switch (kind) {
1081+
case OBJ_OFS_DELTA:
1082+
case OBJ_REF_DELTA:
1083+
return packed_delta_info(p, offset, kind, obj_offset, type, sizep);
10411084
case OBJ_COMMIT:
10421085
case OBJ_TREE:
10431086
case OBJ_BLOB:
@@ -1083,23 +1126,15 @@ static void *unpack_compressed_entry(struct packed_git *p,
10831126
static void *unpack_delta_entry(struct packed_git *p,
10841127
unsigned long offset,
10851128
unsigned long delta_size,
1129+
enum object_type kind,
1130+
unsigned long obj_offset,
10861131
char *type,
10871132
unsigned long *sizep)
10881133
{
10891134
void *delta_data, *result, *base;
10901135
unsigned long result_size, base_size, base_offset;
1091-
unsigned char *base_sha1;
1092-
1093-
if (p->pack_size < offset + 20)
1094-
die("truncated pack file");
1095-
/* The base entry _must_ be in the same pack */
1096-
base_sha1 = (unsigned char*)p->pack_base + offset;
1097-
base_offset = find_pack_entry_one(base_sha1, p);
1098-
if (!base_offset)
1099-
die("failed to find delta-pack base object %s",
1100-
sha1_to_hex(base_sha1));
1101-
offset += 20;
11021136

1137+
offset = get_delta_base(p, offset, kind, obj_offset, &base_offset);
11031138
base = unpack_entry_gently(p, base_offset, type, &base_size);
11041139
if (!base)
11051140
die("failed to read delta base object at %lu from %s",
@@ -1136,13 +1171,14 @@ static void *unpack_entry(struct pack_entry *entry,
11361171
void *unpack_entry_gently(struct packed_git *p, unsigned long offset,
11371172
char *type, unsigned long *sizep)
11381173
{
1139-
unsigned long size;
1174+
unsigned long size, obj_offset = offset;
11401175
enum object_type kind;
11411176

11421177
offset = unpack_object_header(p, offset, &kind, &size);
11431178
switch (kind) {
1144-
case OBJ_DELTA:
1145-
return unpack_delta_entry(p, offset, size, type, sizep);
1179+
case OBJ_OFS_DELTA:
1180+
case OBJ_REF_DELTA:
1181+
return unpack_delta_entry(p, offset, size, kind, obj_offset, type, sizep);
11461182
case OBJ_COMMIT:
11471183
case OBJ_TREE:
11481184
case OBJ_BLOB:

0 commit comments

Comments
 (0)