1919 */
2020
2121#include <stdlib.h>
22+ #include <string.h>
23+ #include <zlib.h>
2224#include "delta.h"
23- #include "zlib.h"
2425
2526
2627/* block size: min = 16, max = 64k, power of 2 */
2930#define MIN (a , b ) ((a) < (b) ? (a) : (b))
3031
3132#define GR_PRIME 0x9e370001
32- #define HASH (v , b ) (((unsigned int)(v) * GR_PRIME) >> (32 - (b)))
33-
34- static unsigned int hashbits (unsigned int size )
35- {
36- unsigned int val = 1 , bits = 0 ;
37- while (val < size && bits < 32 ) {
38- val <<= 1 ;
39- bits ++ ;
40- }
41- return bits ? bits : 1 ;
42- }
43-
44- typedef struct s_chanode {
45- struct s_chanode * next ;
46- int icurr ;
47- } chanode_t ;
48-
49- typedef struct s_chastore {
50- int isize , nsize ;
51- chanode_t * ancur ;
52- } chastore_t ;
53-
54- static void cha_init (chastore_t * cha , int isize , int icount )
55- {
56- cha -> isize = isize ;
57- cha -> nsize = icount * isize ;
58- cha -> ancur = NULL ;
59- }
60-
61- static void * cha_alloc (chastore_t * cha )
62- {
63- chanode_t * ancur ;
64- void * data ;
65-
66- ancur = cha -> ancur ;
67- if (!ancur || ancur -> icurr == cha -> nsize ) {
68- ancur = malloc (sizeof (chanode_t ) + cha -> nsize );
69- if (!ancur )
70- return NULL ;
71- ancur -> icurr = 0 ;
72- ancur -> next = cha -> ancur ;
73- cha -> ancur = ancur ;
74- }
75-
76- data = (void * )ancur + sizeof (chanode_t ) + ancur -> icurr ;
77- ancur -> icurr += cha -> isize ;
78- return data ;
79- }
80-
81- static void cha_free (chastore_t * cha )
82- {
83- chanode_t * cur = cha -> ancur ;
84- while (cur ) {
85- chanode_t * tmp = cur ;
86- cur = cur -> next ;
87- free (tmp );
88- }
89- }
33+ #define HASH (v , shift ) (((unsigned int)(v) * GR_PRIME) >> (shift))
9034
91- typedef struct s_bdrecord {
92- struct s_bdrecord * next ;
93- unsigned int fp ;
35+ struct index {
9436 const unsigned char * ptr ;
95- } bdrecord_t ;
37+ unsigned int val ;
38+ struct index * next ;
39+ };
9640
97- typedef struct s_bdfile {
98- chastore_t cha ;
99- unsigned int fphbits ;
100- bdrecord_t * * fphash ;
101- } bdfile_t ;
102-
103- static int delta_prepare (const unsigned char * buf , int bufsize , bdfile_t * bdf )
41+ static struct index * * delta_index (const unsigned char * buf ,
42+ unsigned long bufsize ,
43+ unsigned int * hash_shift )
10444{
105- unsigned int fphbits ;
106- int i , hsize ;
107- const unsigned char * data , * top ;
108- bdrecord_t * brec ;
109- bdrecord_t * * fphash ;
110-
111- fphbits = hashbits (bufsize / BLK_SIZE + 1 );
112- hsize = 1 << fphbits ;
113- fphash = malloc (hsize * sizeof (bdrecord_t * ));
114- if (!fphash )
115- return -1 ;
116- for (i = 0 ; i < hsize ; i ++ )
117- fphash [i ] = NULL ;
118- cha_init (& bdf -> cha , sizeof (bdrecord_t ), hsize / 4 + 1 );
119-
120- top = buf + bufsize ;
121- data = buf + (bufsize / BLK_SIZE ) * BLK_SIZE ;
122- if (data == top )
45+ unsigned int hsize , hshift , entries , blksize , i ;
46+ const unsigned char * data ;
47+ struct index * entry , * * hash ;
48+ void * mem ;
49+
50+ /* determine index hash size */
51+ entries = (bufsize + BLK_SIZE - 1 ) / BLK_SIZE ;
52+ hsize = entries / 4 ;
53+ for (i = 4 ; (1 << i ) < hsize && i < 16 ; i ++ );
54+ hsize = 1 << i ;
55+ hshift = 32 - i ;
56+ * hash_shift = hshift ;
57+
58+ /* allocate lookup index */
59+ mem = malloc (hsize * sizeof (* hash ) + entries * sizeof (* entry ));
60+ if (!mem )
61+ return NULL ;
62+ hash = mem ;
63+ entry = mem + hsize * sizeof (* hash );
64+ memset (hash , 0 , hsize * sizeof (* hash ));
65+
66+ /* then populate it */
67+ data = buf + entries * BLK_SIZE - BLK_SIZE ;
68+ blksize = bufsize - (data - buf );
69+ while (data >= buf ) {
70+ unsigned int val = adler32 (0 , data , blksize );
71+ i = HASH (val , hshift );
72+ entry -> ptr = data ;
73+ entry -> val = val ;
74+ entry -> next = hash [i ];
75+ hash [i ] = entry ++ ;
76+ blksize = BLK_SIZE ;
12377 data -= BLK_SIZE ;
78+ }
12479
125- for ( ; data >= buf ; data -= BLK_SIZE ) {
126- brec = cha_alloc (& bdf -> cha );
127- if (!brec ) {
128- cha_free (& bdf -> cha );
129- free (fphash );
130- return -1 ;
131- }
132- brec -> fp = adler32 (0 , data , MIN (BLK_SIZE , top - data ));
133- brec -> ptr = data ;
134- i = HASH (brec -> fp , fphbits );
135- brec -> next = fphash [i ];
136- fphash [i ] = brec ;
137- }
138-
139- bdf -> fphbits = fphbits ;
140- bdf -> fphash = fphash ;
141-
142- return 0 ;
143- }
144-
145- static void delta_cleanup (bdfile_t * bdf )
146- {
147- free (bdf -> fphash );
148- cha_free (& bdf -> cha );
80+ return hash ;
14981}
15082
15183/* provide the size of the copy opcode given the block offset and size */
@@ -161,23 +93,24 @@ void *diff_delta(void *from_buf, unsigned long from_size,
16193 unsigned long * delta_size ,
16294 unsigned long max_size )
16395{
164- unsigned int i , outpos , outsize , inscnt , csize , msize , moff ;
165- unsigned int fp ;
166- const unsigned char * ref_data , * ref_top , * data , * top , * ptr1 , * ptr2 ;
167- unsigned char * out , * orig ;
168- bdrecord_t * brec ;
169- bdfile_t bdf ;
96+ unsigned int i , outpos , outsize , inscnt , hash_shift ;
97+ const unsigned char * ref_data , * ref_top , * data , * top ;
98+ unsigned char * out ;
99+ struct index * entry , * * hash ;
170100
171- if (!from_size || !to_size || delta_prepare ( from_buf , from_size , & bdf ) )
101+ if (!from_size || !to_size )
172102 return NULL ;
173-
103+ hash = delta_index (from_buf , from_size , & hash_shift );
104+ if (!hash )
105+ return NULL ;
106+
174107 outpos = 0 ;
175108 outsize = 8192 ;
176109 if (max_size && outsize >= max_size )
177110 outsize = max_size + MAX_OP_SIZE + 1 ;
178111 out = malloc (outsize );
179112 if (!out ) {
180- delta_cleanup ( & bdf );
113+ free ( hash );
181114 return NULL ;
182115 }
183116
@@ -205,28 +138,32 @@ void *diff_delta(void *from_buf, unsigned long from_size,
205138 }
206139
207140 inscnt = 0 ;
208- moff = 0 ;
209- while (data < top ) {
210- msize = 0 ;
211- fp = adler32 (0 , data , MIN (top - data , BLK_SIZE ));
212- i = HASH (fp , bdf .fphbits );
213- for (brec = bdf .fphash [i ]; brec ; brec = brec -> next ) {
214- if (brec -> fp == fp ) {
215- csize = ref_top - brec -> ptr ;
216- if (csize > top - data )
217- csize = top - data ;
218- for (ptr1 = brec -> ptr , ptr2 = data ;
219- csize && * ptr1 == * ptr2 ;
220- csize -- , ptr1 ++ , ptr2 ++ );
221141
222- csize = ptr1 - brec -> ptr ;
223- if (csize > msize ) {
224- moff = brec -> ptr - ref_data ;
225- msize = csize ;
226- if (msize >= 0x10000 ) {
227- msize = 0x10000 ;
228- break ;
229- }
142+ while (data < top ) {
143+ unsigned int moff = 0 , msize = 0 ;
144+ unsigned int blksize = MIN (top - data , BLK_SIZE );
145+ unsigned int val = adler32 (0 , data , blksize );
146+ i = HASH (val , hash_shift );
147+ for (entry = hash [i ]; entry ; entry = entry -> next ) {
148+ const unsigned char * ref = entry -> ptr ;
149+ const unsigned char * src = data ;
150+ unsigned int ref_size = ref_top - ref ;
151+ if (entry -> val != val )
152+ continue ;
153+ if (ref_size > top - src )
154+ ref_size = top - src ;
155+ while (ref_size && * src ++ == * ref ) {
156+ ref ++ ;
157+ ref_size -- ;
158+ }
159+ ref_size = ref - entry -> ptr ;
160+ if (ref_size > msize ) {
161+ /* this is our best match so far */
162+ moff = entry -> ptr - ref_data ;
163+ msize = ref_size ;
164+ if (msize >= 0x10000 ) {
165+ msize = 0x10000 ;
166+ break ;
230167 }
231168 }
232169 }
@@ -241,13 +178,15 @@ void *diff_delta(void *from_buf, unsigned long from_size,
241178 inscnt = 0 ;
242179 }
243180 } else {
181+ unsigned char * op ;
182+
244183 if (inscnt ) {
245184 out [outpos - inscnt - 1 ] = inscnt ;
246185 inscnt = 0 ;
247186 }
248187
249188 data += msize ;
250- orig = out + outpos ++ ;
189+ op = out + outpos ++ ;
251190 i = 0x80 ;
252191
253192 if (moff & 0xff ) { out [outpos ++ ] = moff ; i |= 0x01 ; }
@@ -262,7 +201,7 @@ void *diff_delta(void *from_buf, unsigned long from_size,
262201 msize >>= 8 ;
263202 if (msize & 0xff ) { out [outpos ++ ] = msize ; i |= 0x20 ; }
264203
265- * orig = i ;
204+ * op = i ;
266205 }
267206
268207 if (outpos >= outsize - MAX_OP_SIZE ) {
@@ -276,7 +215,7 @@ void *diff_delta(void *from_buf, unsigned long from_size,
276215 out = realloc (out , outsize );
277216 if (!out ) {
278217 free (tmp );
279- delta_cleanup ( & bdf );
218+ free ( hash );
280219 return NULL ;
281220 }
282221 }
@@ -285,7 +224,7 @@ void *diff_delta(void *from_buf, unsigned long from_size,
285224 if (inscnt )
286225 out [outpos - inscnt - 1 ] = inscnt ;
287226
288- delta_cleanup ( & bdf );
227+ free ( hash );
289228 * delta_size = outpos ;
290229 return out ;
291230}
0 commit comments