Skip to content

Commit 8e1454b

Browse files
Nicolas PitreJunio C Hamano
authored andcommitted
diff-delta: big code simplification
This is much smaller and hopefully clearer code now. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
1 parent fe474b5 commit 8e1454b

File tree

1 file changed

+85
-146
lines changed

1 file changed

+85
-146
lines changed

diff-delta.c

Lines changed: 85 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@
1919
*/
2020

2121
#include <stdlib.h>
22+
#include <string.h>
23+
#include <zlib.h>
2224
#include "delta.h"
23-
#include "zlib.h"
2425

2526

2627
/* block size: min = 16, max = 64k, power of 2 */
@@ -29,123 +30,54 @@
2930
#define MIN(a, b) ((a) < (b) ? (a) : (b))
3031

3132
#define GR_PRIME 0x9e370001
32-
#define HASH(v, b) (((unsigned int)(v) * GR_PRIME) >> (32 - (b)))
33-
34-
static unsigned int hashbits(unsigned int size)
35-
{
36-
unsigned int val = 1, bits = 0;
37-
while (val < size && bits < 32) {
38-
val <<= 1;
39-
bits++;
40-
}
41-
return bits ? bits: 1;
42-
}
43-
44-
typedef struct s_chanode {
45-
struct s_chanode *next;
46-
int icurr;
47-
} chanode_t;
48-
49-
typedef struct s_chastore {
50-
int isize, nsize;
51-
chanode_t *ancur;
52-
} chastore_t;
53-
54-
static void cha_init(chastore_t *cha, int isize, int icount)
55-
{
56-
cha->isize = isize;
57-
cha->nsize = icount * isize;
58-
cha->ancur = NULL;
59-
}
60-
61-
static void *cha_alloc(chastore_t *cha)
62-
{
63-
chanode_t *ancur;
64-
void *data;
65-
66-
ancur = cha->ancur;
67-
if (!ancur || ancur->icurr == cha->nsize) {
68-
ancur = malloc(sizeof(chanode_t) + cha->nsize);
69-
if (!ancur)
70-
return NULL;
71-
ancur->icurr = 0;
72-
ancur->next = cha->ancur;
73-
cha->ancur = ancur;
74-
}
75-
76-
data = (void *)ancur + sizeof(chanode_t) + ancur->icurr;
77-
ancur->icurr += cha->isize;
78-
return data;
79-
}
80-
81-
static void cha_free(chastore_t *cha)
82-
{
83-
chanode_t *cur = cha->ancur;
84-
while (cur) {
85-
chanode_t *tmp = cur;
86-
cur = cur->next;
87-
free(tmp);
88-
}
89-
}
33+
#define HASH(v, shift) (((unsigned int)(v) * GR_PRIME) >> (shift))
9034

91-
typedef struct s_bdrecord {
92-
struct s_bdrecord *next;
93-
unsigned int fp;
35+
struct index {
9436
const unsigned char *ptr;
95-
} bdrecord_t;
37+
unsigned int val;
38+
struct index *next;
39+
};
9640

97-
typedef struct s_bdfile {
98-
chastore_t cha;
99-
unsigned int fphbits;
100-
bdrecord_t **fphash;
101-
} bdfile_t;
102-
103-
static int delta_prepare(const unsigned char *buf, int bufsize, bdfile_t *bdf)
41+
static struct index ** delta_index(const unsigned char *buf,
42+
unsigned long bufsize,
43+
unsigned int *hash_shift)
10444
{
105-
unsigned int fphbits;
106-
int i, hsize;
107-
const unsigned char *data, *top;
108-
bdrecord_t *brec;
109-
bdrecord_t **fphash;
110-
111-
fphbits = hashbits(bufsize / BLK_SIZE + 1);
112-
hsize = 1 << fphbits;
113-
fphash = malloc(hsize * sizeof(bdrecord_t *));
114-
if (!fphash)
115-
return -1;
116-
for (i = 0; i < hsize; i++)
117-
fphash[i] = NULL;
118-
cha_init(&bdf->cha, sizeof(bdrecord_t), hsize / 4 + 1);
119-
120-
top = buf + bufsize;
121-
data = buf + (bufsize / BLK_SIZE) * BLK_SIZE;
122-
if (data == top)
45+
unsigned int hsize, hshift, entries, blksize, i;
46+
const unsigned char *data;
47+
struct index *entry, **hash;
48+
void *mem;
49+
50+
/* determine index hash size */
51+
entries = (bufsize + BLK_SIZE - 1) / BLK_SIZE;
52+
hsize = entries / 4;
53+
for (i = 4; (1 << i) < hsize && i < 16; i++);
54+
hsize = 1 << i;
55+
hshift = 32 - i;
56+
*hash_shift = hshift;
57+
58+
/* allocate lookup index */
59+
mem = malloc(hsize * sizeof(*hash) + entries * sizeof(*entry));
60+
if (!mem)
61+
return NULL;
62+
hash = mem;
63+
entry = mem + hsize * sizeof(*hash);
64+
memset(hash, 0, hsize * sizeof(*hash));
65+
66+
/* then populate it */
67+
data = buf + entries * BLK_SIZE - BLK_SIZE;
68+
blksize = bufsize - (data - buf);
69+
while (data >= buf) {
70+
unsigned int val = adler32(0, data, blksize);
71+
i = HASH(val, hshift);
72+
entry->ptr = data;
73+
entry->val = val;
74+
entry->next = hash[i];
75+
hash[i] = entry++;
76+
blksize = BLK_SIZE;
12377
data -= BLK_SIZE;
78+
}
12479

125-
for ( ; data >= buf; data -= BLK_SIZE) {
126-
brec = cha_alloc(&bdf->cha);
127-
if (!brec) {
128-
cha_free(&bdf->cha);
129-
free(fphash);
130-
return -1;
131-
}
132-
brec->fp = adler32(0, data, MIN(BLK_SIZE, top - data));
133-
brec->ptr = data;
134-
i = HASH(brec->fp, fphbits);
135-
brec->next = fphash[i];
136-
fphash[i] = brec;
137-
}
138-
139-
bdf->fphbits = fphbits;
140-
bdf->fphash = fphash;
141-
142-
return 0;
143-
}
144-
145-
static void delta_cleanup(bdfile_t *bdf)
146-
{
147-
free(bdf->fphash);
148-
cha_free(&bdf->cha);
80+
return hash;
14981
}
15082

15183
/* provide the size of the copy opcode given the block offset and size */
@@ -161,23 +93,24 @@ void *diff_delta(void *from_buf, unsigned long from_size,
16193
unsigned long *delta_size,
16294
unsigned long max_size)
16395
{
164-
unsigned int i, outpos, outsize, inscnt, csize, msize, moff;
165-
unsigned int fp;
166-
const unsigned char *ref_data, *ref_top, *data, *top, *ptr1, *ptr2;
167-
unsigned char *out, *orig;
168-
bdrecord_t *brec;
169-
bdfile_t bdf;
96+
unsigned int i, outpos, outsize, inscnt, hash_shift;
97+
const unsigned char *ref_data, *ref_top, *data, *top;
98+
unsigned char *out;
99+
struct index *entry, **hash;
170100

171-
if (!from_size || !to_size || delta_prepare(from_buf, from_size, &bdf))
101+
if (!from_size || !to_size)
172102
return NULL;
173-
103+
hash = delta_index(from_buf, from_size, &hash_shift);
104+
if (!hash)
105+
return NULL;
106+
174107
outpos = 0;
175108
outsize = 8192;
176109
if (max_size && outsize >= max_size)
177110
outsize = max_size + MAX_OP_SIZE + 1;
178111
out = malloc(outsize);
179112
if (!out) {
180-
delta_cleanup(&bdf);
113+
free(hash);
181114
return NULL;
182115
}
183116

@@ -205,28 +138,32 @@ void *diff_delta(void *from_buf, unsigned long from_size,
205138
}
206139

207140
inscnt = 0;
208-
moff = 0;
209-
while (data < top) {
210-
msize = 0;
211-
fp = adler32(0, data, MIN(top - data, BLK_SIZE));
212-
i = HASH(fp, bdf.fphbits);
213-
for (brec = bdf.fphash[i]; brec; brec = brec->next) {
214-
if (brec->fp == fp) {
215-
csize = ref_top - brec->ptr;
216-
if (csize > top - data)
217-
csize = top - data;
218-
for (ptr1 = brec->ptr, ptr2 = data;
219-
csize && *ptr1 == *ptr2;
220-
csize--, ptr1++, ptr2++);
221141

222-
csize = ptr1 - brec->ptr;
223-
if (csize > msize) {
224-
moff = brec->ptr - ref_data;
225-
msize = csize;
226-
if (msize >= 0x10000) {
227-
msize = 0x10000;
228-
break;
229-
}
142+
while (data < top) {
143+
unsigned int moff = 0, msize = 0;
144+
unsigned int blksize = MIN(top - data, BLK_SIZE);
145+
unsigned int val = adler32(0, data, blksize);
146+
i = HASH(val, hash_shift);
147+
for (entry = hash[i]; entry; entry = entry->next) {
148+
const unsigned char *ref = entry->ptr;
149+
const unsigned char *src = data;
150+
unsigned int ref_size = ref_top - ref;
151+
if (entry->val != val)
152+
continue;
153+
if (ref_size > top - src)
154+
ref_size = top - src;
155+
while (ref_size && *src++ == *ref) {
156+
ref++;
157+
ref_size--;
158+
}
159+
ref_size = ref - entry->ptr;
160+
if (ref_size > msize) {
161+
/* this is our best match so far */
162+
moff = entry->ptr - ref_data;
163+
msize = ref_size;
164+
if (msize >= 0x10000) {
165+
msize = 0x10000;
166+
break;
230167
}
231168
}
232169
}
@@ -241,13 +178,15 @@ void *diff_delta(void *from_buf, unsigned long from_size,
241178
inscnt = 0;
242179
}
243180
} else {
181+
unsigned char *op;
182+
244183
if (inscnt) {
245184
out[outpos - inscnt - 1] = inscnt;
246185
inscnt = 0;
247186
}
248187

249188
data += msize;
250-
orig = out + outpos++;
189+
op = out + outpos++;
251190
i = 0x80;
252191

253192
if (moff & 0xff) { out[outpos++] = moff; i |= 0x01; }
@@ -262,7 +201,7 @@ void *diff_delta(void *from_buf, unsigned long from_size,
262201
msize >>= 8;
263202
if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; }
264203

265-
*orig = i;
204+
*op = i;
266205
}
267206

268207
if (outpos >= outsize - MAX_OP_SIZE) {
@@ -276,7 +215,7 @@ void *diff_delta(void *from_buf, unsigned long from_size,
276215
out = realloc(out, outsize);
277216
if (!out) {
278217
free(tmp);
279-
delta_cleanup(&bdf);
218+
free(hash);
280219
return NULL;
281220
}
282221
}
@@ -285,7 +224,7 @@ void *diff_delta(void *from_buf, unsigned long from_size,
285224
if (inscnt)
286225
out[outpos - inscnt - 1] = inscnt;
287226

288-
delta_cleanup(&bdf);
227+
free(hash);
289228
*delta_size = outpos;
290229
return out;
291230
}

0 commit comments

Comments
 (0)