|
1 | 1 | #include "cache.h" |
2 | 2 | #include "diff.h" |
3 | 3 | #include "diffcore.h" |
4 | | -#include "delta.h" |
5 | | -#include "count-delta.h" |
6 | | - |
7 | | -static int diffcore_count_changes_1(void *src, unsigned long src_size, |
8 | | - void *dst, unsigned long dst_size, |
9 | | - unsigned long delta_limit, |
10 | | - unsigned long *src_copied, |
11 | | - unsigned long *literal_added) |
12 | | -{ |
13 | | - void *delta; |
14 | | - unsigned long delta_size; |
15 | | - |
16 | | - delta = diff_delta(src, src_size, |
17 | | - dst, dst_size, |
18 | | - &delta_size, delta_limit); |
19 | | - if (!delta) |
20 | | - /* If delta_limit is exceeded, we have too much differences */ |
21 | | - return -1; |
22 | 4 |
|
23 | | - /* Estimate the edit size by interpreting delta. */ |
24 | | - if (count_delta(delta, delta_size, src_copied, literal_added)) { |
25 | | - free(delta); |
26 | | - return -1; |
| 5 | +struct linehash { |
| 6 | + unsigned long bytes; |
| 7 | + unsigned long hash; |
| 8 | +}; |
| 9 | + |
| 10 | +static unsigned long hash_extended_line(const unsigned char **buf_p, |
| 11 | + unsigned long left) |
| 12 | +{ |
| 13 | + /* An extended line is zero or more whitespace letters (including LF) |
| 14 | + * followed by one non whitespace letter followed by zero or more |
| 15 | + * non LF, and terminated with by a LF (or EOF). |
| 16 | + */ |
| 17 | + const unsigned char *bol = *buf_p; |
| 18 | + const unsigned char *buf = bol; |
| 19 | + unsigned long hashval = 0; |
| 20 | + while (left) { |
| 21 | + unsigned c = *buf++; |
| 22 | + if (!c) |
| 23 | + goto binary; |
| 24 | + left--; |
| 25 | + if (' ' < c) { |
| 26 | + hashval = c; |
| 27 | + break; |
| 28 | + } |
| 29 | + } |
| 30 | + while (left) { |
| 31 | + unsigned c = *buf++; |
| 32 | + if (!c) |
| 33 | + goto binary; |
| 34 | + left--; |
| 35 | + if (c == '\n') |
| 36 | + break; |
| 37 | + if (' ' < c) |
| 38 | + hashval = hashval * 11 + c; |
27 | 39 | } |
28 | | - free(delta); |
| 40 | + *buf_p = buf; |
| 41 | + return hashval; |
| 42 | + |
| 43 | + binary: |
| 44 | + *buf_p = NULL; |
| 45 | + return 0; |
| 46 | +} |
| 47 | + |
| 48 | +static int linehash_compare(const void *a_, const void *b_) |
| 49 | +{ |
| 50 | + struct linehash *a = (struct linehash *) a_; |
| 51 | + struct linehash *b = (struct linehash *) b_; |
| 52 | + if (a->hash < b->hash) return -1; |
| 53 | + if (a->hash > b->hash) return 1; |
29 | 54 | return 0; |
30 | 55 | } |
31 | 56 |
|
| 57 | +static struct linehash *hash_lines(const unsigned char *buf, |
| 58 | + unsigned long size) |
| 59 | +{ |
| 60 | + const unsigned char *eobuf = buf + size; |
| 61 | + struct linehash *line = NULL; |
| 62 | + int alloc = 0, used = 0; |
| 63 | + |
| 64 | + while (buf < eobuf) { |
| 65 | + const unsigned char *ptr = buf; |
| 66 | + unsigned long hash = hash_extended_line(&buf, eobuf-ptr); |
| 67 | + if (!buf) { |
| 68 | + free(line); |
| 69 | + return NULL; |
| 70 | + } |
| 71 | + if (alloc <= used) { |
| 72 | + alloc = alloc_nr(alloc); |
| 73 | + line = xrealloc(line, sizeof(*line) * alloc); |
| 74 | + } |
| 75 | + line[used].bytes = buf - ptr; |
| 76 | + line[used].hash = hash; |
| 77 | + used++; |
| 78 | + } |
| 79 | + qsort(line, used, sizeof(*line), linehash_compare); |
| 80 | + |
| 81 | + /* Terminate the list */ |
| 82 | + if (alloc <= used) |
| 83 | + line = xrealloc(line, sizeof(*line) * (used+1)); |
| 84 | + line[used].bytes = line[used].hash = 0; |
| 85 | + return line; |
| 86 | +} |
| 87 | + |
32 | 88 | int diffcore_count_changes(void *src, unsigned long src_size, |
33 | 89 | void *dst, unsigned long dst_size, |
34 | 90 | unsigned long delta_limit, |
35 | 91 | unsigned long *src_copied, |
36 | 92 | unsigned long *literal_added) |
37 | 93 | { |
38 | | - return diffcore_count_changes_1(src, src_size, |
39 | | - dst, dst_size, |
40 | | - delta_limit, |
41 | | - src_copied, |
42 | | - literal_added); |
| 94 | + struct linehash *src_lines, *dst_lines; |
| 95 | + unsigned long sc, la; |
| 96 | + |
| 97 | + src_lines = hash_lines(src, src_size); |
| 98 | + if (!src_lines) |
| 99 | + return -1; |
| 100 | + dst_lines = hash_lines(dst, dst_size); |
| 101 | + if (!dst_lines) { |
| 102 | + free(src_lines); |
| 103 | + return -1; |
| 104 | + } |
| 105 | + sc = la = 0; |
| 106 | + while (src_lines->bytes && dst_lines->bytes) { |
| 107 | + int cmp = linehash_compare(src_lines, dst_lines); |
| 108 | + if (!cmp) { |
| 109 | + sc += src_lines->bytes; |
| 110 | + src_lines++; |
| 111 | + dst_lines++; |
| 112 | + continue; |
| 113 | + } |
| 114 | + if (cmp < 0) { |
| 115 | + src_lines++; |
| 116 | + continue; |
| 117 | + } |
| 118 | + la += dst_lines->bytes; |
| 119 | + dst_lines++; |
| 120 | + } |
| 121 | + while (dst_lines->bytes) { |
| 122 | + la += dst_lines->bytes; |
| 123 | + dst_lines++; |
| 124 | + } |
| 125 | + *src_copied = sc; |
| 126 | + *literal_added = la; |
| 127 | + return 0; |
43 | 128 | } |
0 commit comments