Skip to content

Commit 731490b

Browse files
committed
Merge branch 'jk/xdiff-drop-xdl-fast-hash'
Retire the "fast hash" that had disastrous performance issues in some corner cases. * jk/xdiff-drop-xdl-fast-hash: xdiff: drop XDL_FAST_HASH
2 parents 06cd5a1 + 1f7c926 commit 731490b

File tree

3 files changed

+0
-118
lines changed

3 files changed

+0
-118
lines changed

Makefile

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -338,11 +338,6 @@ all::
338338
#
339339
# Define NATIVE_CRLF if your platform uses CRLF for line endings.
340340
#
341-
# Define XDL_FAST_HASH to use an alternative line-hashing method in
342-
# the diff algorithm. It gives a nice speedup if your processor has
343-
# fast unaligned word loads. Does NOT work on big-endian systems!
344-
# Enabled by default on x86_64.
345-
#
346341
# Define GIT_USER_AGENT if you want to change how git identifies itself during
347342
# network interactions. The default is "git/$(GIT_VERSION)".
348343
#
@@ -1485,10 +1480,6 @@ ifndef NO_MSGFMT_EXTENDED_OPTIONS
14851480
MSGFMT += --check --statistics
14861481
endif
14871482

1488-
ifneq (,$(XDL_FAST_HASH))
1489-
BASIC_CFLAGS += -DXDL_FAST_HASH
1490-
endif
1491-
14921483
ifdef GMTIME_UNRELIABLE_ERRORS
14931484
COMPAT_OBJS += compat/gmtime.o
14941485
BASIC_CFLAGS += -DGMTIME_UNRELIABLE_ERRORS

config.mak.uname

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@ endif
1717
# because maintaining the nesting to match is a pain. If
1818
# we had "elif" things would have been much nicer...
1919

20-
ifeq ($(uname_M),x86_64)
21-
XDL_FAST_HASH = YesPlease
22-
endif
2320
ifeq ($(uname_S),OSF1)
2421
# Need this for u_short definitions et al
2522
BASIC_CFLAGS += -D_OSF_SOURCE

xdiff/xutils.c

Lines changed: 0 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -264,110 +264,6 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data,
264264
return ha;
265265
}
266266

267-
#ifdef XDL_FAST_HASH
268-
269-
#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
270-
271-
#define ONEBYTES REPEAT_BYTE(0x01)
272-
#define NEWLINEBYTES REPEAT_BYTE(0x0a)
273-
#define HIGHBITS REPEAT_BYTE(0x80)
274-
275-
/* Return the high bit set in the first byte that is a zero */
276-
static inline unsigned long has_zero(unsigned long a)
277-
{
278-
return ((a - ONEBYTES) & ~a) & HIGHBITS;
279-
}
280-
281-
static inline long count_masked_bytes(unsigned long mask)
282-
{
283-
if (sizeof(long) == 8) {
284-
/*
285-
* Jan Achrenius on G+: microoptimized version of
286-
* the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
287-
* that works for the bytemasks without having to
288-
* mask them first.
289-
*/
290-
/*
291-
* return mask * 0x0001020304050608 >> 56;
292-
*
293-
* Doing it like this avoids warnings on 32-bit machines.
294-
*/
295-
long a = (REPEAT_BYTE(0x01) / 0xff + 1);
296-
return mask * a >> (sizeof(long) * 7);
297-
} else {
298-
/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
299-
/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
300-
long a = (0x0ff0001 + mask) >> 23;
301-
/* Fix the 1 for 00 case */
302-
return a & mask;
303-
}
304-
}
305-
306-
unsigned long xdl_hash_record(char const **data, char const *top, long flags)
307-
{
308-
unsigned long hash = 5381;
309-
unsigned long a = 0, mask = 0;
310-
char const *ptr = *data;
311-
char const *end = top - sizeof(unsigned long) + 1;
312-
313-
if (flags & XDF_WHITESPACE_FLAGS)
314-
return xdl_hash_record_with_whitespace(data, top, flags);
315-
316-
ptr -= sizeof(unsigned long);
317-
do {
318-
hash += hash << 5;
319-
hash ^= a;
320-
ptr += sizeof(unsigned long);
321-
if (ptr >= end)
322-
break;
323-
a = *(unsigned long *)ptr;
324-
/* Do we have any '\n' bytes in this word? */
325-
mask = has_zero(a ^ NEWLINEBYTES);
326-
} while (!mask);
327-
328-
if (ptr >= end) {
329-
/*
330-
* There is only a partial word left at the end of the
331-
* buffer. Because we may work with a memory mapping,
332-
* we have to grab the rest byte by byte instead of
333-
* blindly reading it.
334-
*
335-
* To avoid problems with masking in a signed value,
336-
* we use an unsigned char here.
337-
*/
338-
const char *p;
339-
for (p = top - 1; p >= ptr; p--)
340-
a = (a << 8) + *((const unsigned char *)p);
341-
mask = has_zero(a ^ NEWLINEBYTES);
342-
if (!mask)
343-
/*
344-
* No '\n' found in the partial word. Make a
345-
* mask that matches what we read.
346-
*/
347-
mask = 1UL << (8 * (top - ptr) + 7);
348-
}
349-
350-
/* The mask *below* the first high bit set */
351-
mask = (mask - 1) & ~mask;
352-
mask >>= 7;
353-
hash += hash << 5;
354-
hash ^= a & mask;
355-
356-
/* Advance past the last (possibly partial) word */
357-
ptr += count_masked_bytes(mask);
358-
359-
if (ptr < top) {
360-
assert(*ptr == '\n');
361-
ptr++;
362-
}
363-
364-
*data = ptr;
365-
366-
return hash;
367-
}
368-
369-
#else /* XDL_FAST_HASH */
370-
371267
unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
372268
unsigned long ha = 5381;
373269
char const *ptr = *data;
@@ -384,8 +280,6 @@ unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
384280
return ha;
385281
}
386282

387-
#endif /* XDL_FAST_HASH */
388-
389283
unsigned int xdl_hashbits(unsigned int size) {
390284
unsigned int val = 1, bits = 0;
391285

0 commit comments

Comments
 (0)