2424 * The length of the sequence is arbitrarily set to 8 for now.
2525 */
2626
27+ /* Wild guess at the initial hash size */
28+ #define INITIAL_HASH_SIZE 10
2729#define HASHBASE 65537 /* next_prime(2^16) */
2830
29- static void hash_chars (unsigned char * buf , unsigned long sz , int * count )
31+ struct spanhash {
32+ unsigned long hashval ;
33+ unsigned long cnt ;
34+ };
35+ struct spanhash_top {
36+ int alloc_log2 ;
37+ int free ;
38+ struct spanhash data [FLEX_ARRAY ];
39+ };
40+
41+ static struct spanhash * spanhash_find (struct spanhash_top * top , unsigned long hashval )
42+ {
43+ int sz = 1 << top -> alloc_log2 ;
44+ int bucket = hashval & (sz - 1 );
45+ while (1 ) {
46+ struct spanhash * h = & (top -> data [bucket ++ ]);
47+ if (!h -> cnt )
48+ return NULL ;
49+ if (h -> hashval == hashval )
50+ return h ;
51+ if (sz <= bucket )
52+ bucket = 0 ;
53+ }
54+ }
55+
56+ static struct spanhash_top * spanhash_rehash (struct spanhash_top * orig )
57+ {
58+ struct spanhash_top * new ;
59+ int i ;
60+ int osz = 1 << orig -> alloc_log2 ;
61+ int sz = osz << 1 ;
62+
63+ new = xmalloc (sizeof (* orig ) + sizeof (struct spanhash ) * sz );
64+ new -> alloc_log2 = orig -> alloc_log2 + 1 ;
65+ new -> free = osz ;
66+ memset (new -> data , 0 , sizeof (struct spanhash ) * sz );
67+ for (i = 0 ; i < osz ; i ++ ) {
68+ struct spanhash * o = & (orig -> data [i ]);
69+ int bucket ;
70+ if (!o -> cnt )
71+ continue ;
72+ bucket = o -> hashval & (sz - 1 );
73+ while (1 ) {
74+ struct spanhash * h = & (new -> data [bucket ++ ]);
75+ if (!h -> cnt ) {
76+ h -> hashval = o -> hashval ;
77+ h -> cnt = o -> cnt ;
78+ new -> free -- ;
79+ break ;
80+ }
81+ if (sz <= bucket )
82+ bucket = 0 ;
83+ }
84+ }
85+ free (orig );
86+ return new ;
87+ }
88+
89+ static struct spanhash_top * add_spanhash (struct spanhash_top * top ,
90+ unsigned long hashval )
91+ {
92+ int bucket , lim ;
93+ struct spanhash * h ;
94+
95+ lim = (1 << top -> alloc_log2 );
96+ bucket = hashval & (lim - 1 );
97+ while (1 ) {
98+ h = & (top -> data [bucket ++ ]);
99+ if (!h -> cnt ) {
100+ h -> hashval = hashval ;
101+ h -> cnt = 1 ;
102+ top -> free -- ;
103+ if (top -> free < 0 )
104+ return spanhash_rehash (top );
105+ return top ;
106+ }
107+ if (h -> hashval == hashval ) {
108+ h -> cnt ++ ;
109+ return top ;
110+ }
111+ if (lim <= bucket )
112+ bucket = 0 ;
113+ }
114+ }
115+
116+ static struct spanhash_top * hash_chars (unsigned char * buf , unsigned long sz )
30117{
31- unsigned int accum1 , accum2 , i ;
118+ int i ;
119+ unsigned long accum1 , accum2 , hashval ;
120+ struct spanhash_top * hash ;
121+
122+ i = INITIAL_HASH_SIZE ;
123+ hash = xmalloc (sizeof (* hash ) + sizeof (struct spanhash ) * (1 <<i ));
124+ hash -> alloc_log2 = i ;
125+ hash -> free = (1 <<i )/2 ;
126+ memset (hash -> data , 0 , sizeof (struct spanhash ) * (1 <<i ));
32127
33128 /* an 8-byte shift register made of accum1 and accum2. New
34129 * bytes come at LSB of accum2, and shifted up to accum1
@@ -40,44 +135,68 @@ static void hash_chars(unsigned char *buf, unsigned long sz, int *count)
40135 while (sz ) {
41136 accum1 = (accum1 << 8 ) | (accum2 >> 24 );
42137 accum2 = (accum2 << 8 ) | * buf ++ ;
43- /* We want something that hashes permuted byte
44- * sequences nicely; simpler hash like (accum1 ^
45- * accum2) does not perform as well.
46- */
47- i = (accum1 + accum2 * 0x61 ) % HASHBASE ;
48- count [i ]++ ;
138+ hashval = (accum1 + accum2 * 0x61 ) % HASHBASE ;
139+ hash = add_spanhash (hash , hashval );
49140 sz -- ;
50141 }
142+ return hash ;
51143}
52144
53145int diffcore_count_changes (void * src , unsigned long src_size ,
54146 void * dst , unsigned long dst_size ,
147+ void * * src_count_p ,
148+ void * * dst_count_p ,
55149 unsigned long delta_limit ,
56150 unsigned long * src_copied ,
57151 unsigned long * literal_added )
58152{
59- int * src_count , * dst_count , i ;
153+ int i , ssz ;
154+ struct spanhash_top * src_count , * dst_count ;
60155 unsigned long sc , la ;
61156
62157 if (src_size < 8 || dst_size < 8 )
63158 return -1 ;
64159
65- src_count = xcalloc (HASHBASE * 2 , sizeof (int ));
66- dst_count = src_count + HASHBASE ;
67- hash_chars (src , src_size , src_count );
68- hash_chars (dst , dst_size , dst_count );
69-
160+ src_count = dst_count = NULL ;
161+ if (src_count_p )
162+ src_count = * src_count_p ;
163+ if (!src_count ) {
164+ src_count = hash_chars (src , src_size );
165+ if (src_count_p )
166+ * src_count_p = src_count ;
167+ }
168+ if (dst_count_p )
169+ dst_count = * dst_count_p ;
170+ if (!dst_count ) {
171+ dst_count = hash_chars (dst , dst_size );
172+ if (dst_count_p )
173+ * dst_count_p = dst_count ;
174+ }
70175 sc = la = 0 ;
71- for (i = 0 ; i < HASHBASE ; i ++ ) {
72- if (src_count [i ] < dst_count [i ]) {
73- la += dst_count [i ] - src_count [i ];
74- sc += src_count [i ];
176+
177+ ssz = 1 << src_count -> alloc_log2 ;
178+ for (i = 0 ; i < ssz ; i ++ ) {
179+ struct spanhash * s = & (src_count -> data [i ]);
180+ struct spanhash * d ;
181+ unsigned dst_cnt , src_cnt ;
182+ if (!s -> cnt )
183+ continue ;
184+ src_cnt = s -> cnt ;
185+ d = spanhash_find (dst_count , s -> hashval );
186+ dst_cnt = d ? d -> cnt : 0 ;
187+ if (src_cnt < dst_cnt ) {
188+ la += dst_cnt - src_cnt ;
189+ sc += src_cnt ;
75190 }
76- else /* i.e. if (dst_count[i] <= src_count[i]) */
77- sc += dst_count [ i ] ;
191+ else
192+ sc += dst_cnt ;
78193 }
194+
195+ if (!src_count_p )
196+ free (src_count );
197+ if (!dst_count_p )
198+ free (dst_count );
79199 * src_copied = sc ;
80200 * literal_added = la ;
81- free (src_count );
82201 return 0 ;
83202}
0 commit comments