Skip to content

Commit 38fd072

Browse files
Nicolas PitreJunio C Hamano
authored andcommitted
diff-delta: allow reusing of the reference buffer index
When a reference buffer is used multiple times then its index can be computed only once and reused multiple times. This patch adds an extra pointer to a pointer argument (from_index) to diff_delta() for this. If from_index is NULL then everything is like before. If from_index is non NULL and *from_index is NULL then the index is created and its location stored to *from_index. In this case the caller has the responsibility to free the memory pointed to by *from_index. If from_index and *from_index are non NULL then the index is reused as is. This currently saves about 10% of CPU time to repack the git archive. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
1 parent 5bb86b8 commit 38fd072

File tree

6 files changed

+40
-21
lines changed

6 files changed

+40
-21
lines changed

delta.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
/* handling of delta buffers */
55
extern void *diff_delta(void *from_buf, unsigned long from_size,
66
void *to_buf, unsigned long to_size,
7-
unsigned long *delta_size, unsigned long max_size);
7+
unsigned long *delta_size, unsigned long max_size,
8+
void **from_index);
89
extern void *patch_delta(void *src_buf, unsigned long src_size,
910
void *delta_buf, unsigned long delta_size,
1011
unsigned long *dst_size);

diff-delta.c

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ struct index {
3030

3131
static struct index ** delta_index(const unsigned char *buf,
3232
unsigned long bufsize,
33-
unsigned long trg_bufsize,
34-
unsigned int *hash_shift)
33+
unsigned long trg_bufsize)
3534
{
3635
unsigned long hsize;
3736
unsigned int i, hshift, hlimit, *hash_count;
@@ -44,14 +43,17 @@ static struct index ** delta_index(const unsigned char *buf,
4443
for (i = 8; (1 << i) < hsize && i < 24; i += 2);
4544
hsize = 1 << i;
4645
hshift = (i - 8) / 2;
47-
*hash_shift = hshift;
4846

49-
/* allocate lookup index */
50-
mem = malloc(hsize * sizeof(*hash) + bufsize * sizeof(*entry));
47+
/*
48+
* Allocate lookup index. Note the first hash pointer
49+
* is used to store the hash shift value.
50+
*/
51+
mem = malloc((1 + hsize) * sizeof(*hash) + bufsize * sizeof(*entry));
5152
if (!mem)
5253
return NULL;
5354
hash = mem;
54-
entry = mem + hsize * sizeof(*hash);
55+
*hash++ = (void *)hshift;
56+
entry = mem + (1 + hsize) * sizeof(*hash);
5557
memset(hash, 0, hsize * sizeof(*hash));
5658

5759
/* allocate an array to count hash entries */
@@ -107,7 +109,7 @@ static struct index ** delta_index(const unsigned char *buf,
107109
}
108110
free(hash_count);
109111

110-
return hash;
112+
return hash-1;
111113
}
112114

113115
/* provide the size of the copy opcode given the block offset and size */
@@ -121,7 +123,8 @@ static struct index ** delta_index(const unsigned char *buf,
121123
void *diff_delta(void *from_buf, unsigned long from_size,
122124
void *to_buf, unsigned long to_size,
123125
unsigned long *delta_size,
124-
unsigned long max_size)
126+
unsigned long max_size,
127+
void **from_index)
125128
{
126129
unsigned int i, outpos, outsize, inscnt, hash_shift;
127130
const unsigned char *ref_data, *ref_top, *data, *top;
@@ -130,17 +133,25 @@ void *diff_delta(void *from_buf, unsigned long from_size,
130133

131134
if (!from_size || !to_size)
132135
return NULL;
133-
hash = delta_index(from_buf, from_size, to_size, &hash_shift);
134-
if (!hash)
135-
return NULL;
136+
if (from_index && *from_index) {
137+
hash = *from_index;
138+
} else {
139+
hash = delta_index(from_buf, from_size, to_size);
140+
if (!hash)
141+
return NULL;
142+
if (from_index)
143+
*from_index = hash;
144+
}
145+
hash_shift = (unsigned int)(*hash++);
136146

137147
outpos = 0;
138148
outsize = 8192;
139149
if (max_size && outsize >= max_size)
140150
outsize = max_size + MAX_OP_SIZE + 1;
141151
out = malloc(outsize);
142152
if (!out) {
143-
free(hash);
153+
if (!from_index)
154+
free(hash-1);
144155
return NULL;
145156
}
146157

@@ -241,7 +252,8 @@ void *diff_delta(void *from_buf, unsigned long from_size,
241252
out = realloc(out, outsize);
242253
if (!out) {
243254
free(tmp);
244-
free(hash);
255+
if (!from_index)
256+
free(hash-1);
245257
return NULL;
246258
}
247259
}
@@ -250,7 +262,8 @@ void *diff_delta(void *from_buf, unsigned long from_size,
250262
if (inscnt)
251263
out[outpos - inscnt - 1] = inscnt;
252264

253-
free(hash);
265+
if (!from_index)
266+
free(hash-1);
254267
*delta_size = outpos;
255268
return out;
256269
}

diffcore-break.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ static int should_break(struct diff_filespec *src,
7171

7272
delta = diff_delta(src->data, src->size,
7373
dst->data, dst->size,
74-
&delta_size, 0);
74+
&delta_size, 0, NULL);
7575
if (!delta)
7676
return 0; /* error but caught downstream */
7777

diffcore-rename.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ static int estimate_similarity(struct diff_filespec *src,
168168
delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE;
169169
delta = diff_delta(src->data, src->size,
170170
dst->data, dst->size,
171-
&delta_size, delta_limit);
171+
&delta_size, delta_limit, NULL);
172172
if (!delta)
173173
/* If delta_limit is exceeded, we have too much differences */
174174
return 0;

pack-objects.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ static void *delta_against(void *buf, unsigned long size, struct object_entry *e
204204
if (!otherbuf)
205205
die("unable to read %s", sha1_to_hex(entry->delta->sha1));
206206
delta_buf = diff_delta(otherbuf, othersize,
207-
buf, size, &delta_size, 0);
207+
buf, size, &delta_size, 0, NULL);
208208
if (!delta_buf || delta_size != entry->delta_size)
209209
die("delta size changed");
210210
free(buf);
@@ -810,6 +810,7 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr
810810
struct unpacked {
811811
struct object_entry *entry;
812812
void *data;
813+
void **delta_index;
813814
};
814815

815816
/*
@@ -891,7 +892,8 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
891892
if (sizediff >= max_size)
892893
return -1;
893894
delta_buf = diff_delta(old->data, oldsize,
894-
cur->data, size, &delta_size, max_size);
895+
cur->data, size, &delta_size,
896+
max_size, old->delta_index);
895897
if (!delta_buf)
896898
return 0;
897899
cur_entry->delta = old_entry;
@@ -948,6 +950,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
948950
*/
949951
continue;
950952

953+
free(n->delta_index);
951954
free(n->data);
952955
n->entry = entry;
953956
n->data = read_sha1_file(entry->sha1, type, &size);
@@ -974,8 +977,10 @@ static void find_deltas(struct object_entry **list, int window, int depth)
974977
if (progress)
975978
fputc('\n', stderr);
976979

977-
for (i = 0; i < window; ++i)
980+
for (i = 0; i < window; ++i) {
981+
free(array[i].delta_index);
978982
free(array[i].data);
983+
}
979984
free(array);
980985
}
981986

test-delta.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ int main(int argc, char *argv[])
6363
if (argv[1][1] == 'd')
6464
out_buf = diff_delta(from_buf, from_size,
6565
data_buf, data_size,
66-
&out_size, 0);
66+
&out_size, 0, NULL);
6767
else
6868
out_buf = patch_delta(from_buf, from_size,
6969
data_buf, data_size,

0 commit comments

Comments
 (0)