Skip to content

Commit c436eb8

Browse files
author
Junio C Hamano
committed
diff-delta: cull collided hash bucket more aggressively.
This tries to limit collided hash buckets by removing identical three-byte prefix from the same hashbucket.
1 parent d00e0f8 commit c436eb8

File tree

1 file changed

+25
-12
lines changed

1 file changed

+25
-12
lines changed

diff-delta.c

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -88,22 +88,35 @@ static struct index ** delta_index(const unsigned char *buf,
8888

8989
/*
9090
* Now make sure none of the hash buckets has more entries than
91-
* we're willing to test. Otherwise we short-circuit the entry
92-
* list uniformly to still preserve a good repartition across
93-
* the reference buffer.
91+
* we're willing to test. Otherwise we cull the entry list to
92+
* limit identical three byte prefixes to still preserve a good
93+
* repartition across the reference buffer.
9494
*/
9595
for (i = 0; i < hsize; i++) {
96+
struct index **list, *bucket, *remaining;
97+
int cnt;
9698
if (hash_count[i] < hlimit)
9799
continue;
98-
entry = hash[i];
99-
do {
100-
struct index *keep = entry;
101-
int skip = hash_count[i] / hlimit / 2;
102-
do {
103-
entry = entry->next;
104-
} while(--skip && entry);
105-
keep->next = entry;
106-
} while(entry);
100+
101+
bucket = NULL;
102+
list = &bucket;
103+
remaining = hash[i];
104+
cnt = 0;
105+
while (cnt < hlimit && remaining) {
106+
struct index *this = remaining, *that;
107+
remaining = remaining->next;
108+
for (that = bucket; that; that = that->next) {
109+
if (!memcmp(that->ptr, this->ptr, 3))
110+
break;
111+
}
112+
if (that)
113+
continue; /* discard */
114+
cnt++;
115+
*list = this;
116+
list = &(this->next);
117+
this->next = NULL;
118+
}
119+
hash[i] = bucket;
107120
}
108121
free(hash_count);
109122

0 commit comments

Comments
 (0)