Skip to content

Commit 21536d0

Browse files
phillipwoodgitster
authored andcommitted
diff --color-moved-ws: modify allow-indentation-change
Currently diff --color-moved-ws=allow-indentation-change does not support indentation that contains a mix of tabs and spaces. For example in commit 546f70f ("convert.h: drop 'extern' from function declaration", 2018-06-30) the function parameters in the following lines are not colored as moved [1]. -extern int stream_filter(struct stream_filter *, - const char *input, size_t *isize_p, - char *output, size_t *osize_p); +int stream_filter(struct stream_filter *, + const char *input, size_t *isize_p, + char *output, size_t *osize_p); This commit changes the way the indentation is handled to track the visual size of the indentation rather than the characters in the indentation. This has the benefit that any whitespace errors do not interfer with the move detection (the whitespace errors will still be highlighted according to --ws-error-highlight). During the discussion of this feature there were concerns about the correct detection of indentation for python. However those concerns apply whether or not we're detecting moved lines so no attempt is made to determine if the indentation is 'pythonic'. [1] Note that before the commit to fix the erroneous coloring of moved lines each line was colored as a different block, since that commit they are uncolored. Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk> Reviewed-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 7a4252c commit 21536d0

File tree

2 files changed

+130
-58
lines changed

2 files changed

+130
-58
lines changed

diff.c

Lines changed: 74 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,8 @@ struct emitted_diff_symbol {
752752
const char *line;
753753
int len;
754754
int flags;
755+
int indent_off; /* Offset to first non-whitespace character */
756+
int indent_width; /* The visual width of the indentation */
755757
enum diff_symbol s;
756758
};
757759
#define EMITTED_DIFF_SYMBOL_INIT {NULL}
@@ -782,44 +784,68 @@ struct moved_entry {
782784
struct moved_entry *next_line;
783785
};
784786

785-
/**
786-
* The struct ws_delta holds white space differences between moved lines, i.e.
787-
* between '+' and '-' lines that have been detected to be a move.
788-
* The string contains the difference in leading white spaces, before the
789-
* rest of the line is compared using the white space config for move
790-
* coloring. The current_longer indicates if the first string in the
791-
* comparision is longer than the second.
792-
*/
793-
struct ws_delta {
794-
char *string;
795-
unsigned int current_longer : 1;
796-
};
797-
#define WS_DELTA_INIT { NULL, 0 }
798-
799787
struct moved_block {
800788
struct moved_entry *match;
801-
struct ws_delta wsd;
789+
int wsd; /* The whitespace delta of this block */
802790
};
803791

804792
static void moved_block_clear(struct moved_block *b)
805793
{
806-
FREE_AND_NULL(b->wsd.string);
807-
b->match = NULL;
794+
memset(b, 0, sizeof(*b));
808795
}
809796

810-
static int compute_ws_delta(const struct emitted_diff_symbol *a,
811-
const struct emitted_diff_symbol *b,
812-
struct ws_delta *out)
797+
static void fill_es_indent_data(struct emitted_diff_symbol *es)
813798
{
814-
const struct emitted_diff_symbol *longer = a->len > b->len ? a : b;
815-
const struct emitted_diff_symbol *shorter = a->len > b->len ? b : a;
816-
int d = longer->len - shorter->len;
799+
unsigned int off = 0;
800+
int width = 0, tab_width = es->flags & WS_TAB_WIDTH_MASK;
801+
const char *s = es->line;
802+
const int len = es->len;
803+
804+
/* skip any \v \f \r at start of indentation */
805+
while (s[off] == '\f' || s[off] == '\v' ||
806+
(s[off] == '\r' && off < len - 1))
807+
off++;
808+
809+
/* calculate the visual width of indentation */
810+
while(1) {
811+
if (s[off] == ' ') {
812+
width++;
813+
off++;
814+
} else if (s[off] == '\t') {
815+
width += tab_width - (width % tab_width);
816+
while (s[++off] == '\t')
817+
width += tab_width;
818+
} else {
819+
break;
820+
}
821+
}
822+
823+
es->indent_off = off;
824+
es->indent_width = width;
825+
}
826+
827+
static int compute_ws_delta(const struct emitted_diff_symbol *a,
828+
const struct emitted_diff_symbol *b,
829+
int *out)
830+
{
831+
int a_len = a->len,
832+
b_len = b->len,
833+
a_off = a->indent_off,
834+
a_width = a->indent_width,
835+
b_off = b->indent_off,
836+
b_width = b->indent_width;
837+
int delta;
838+
839+
if (a->s == DIFF_SYMBOL_PLUS)
840+
delta = a_width - b_width;
841+
else
842+
delta = b_width - a_width;
817843

818-
if (strncmp(longer->line + d, shorter->line, shorter->len))
844+
if (a_len - a_off != b_len - b_off ||
845+
memcmp(a->line + a_off, b->line + b_off, a_len - a_off))
819846
return 0;
820847

821-
out->string = xmemdupz(longer->line, d);
822-
out->current_longer = (a == longer);
848+
*out = delta;
823849

824850
return 1;
825851
}
@@ -835,8 +861,11 @@ static int cmp_in_block_with_wsd(const struct diff_options *o,
835861
const char *a = cur->es->line,
836862
*b = match->es->line,
837863
*c = l->line;
838-
const char *orig_a = a;
839-
int wslen;
864+
int a_off = cur->es->indent_off,
865+
a_width = cur->es->indent_width,
866+
c_off = l->indent_off,
867+
c_width = l->indent_width;
868+
int delta;
840869

841870
/*
842871
* We need to check if 'cur' is equal to 'match'. As those
@@ -850,35 +879,20 @@ static int cmp_in_block_with_wsd(const struct diff_options *o,
850879
if (al != bl)
851880
return 1;
852881

853-
if (!pmb->wsd.string)
854-
/*
855-
* The white space delta is not active? This can happen
856-
* when we exit early in this function.
857-
*/
858-
return 1;
859-
860882
/*
861-
* The indent changes of the block are known and stored in
862-
* pmb->wsd; however we need to check if the indent changes of the
863-
* current line are still the same as before.
864-
*
865-
* To do so we need to compare 'l' to 'cur', adjusting the
866-
* one of them for the white spaces, depending which was longer.
883+
* The indent changes of the block are known and stored in pmb->wsd;
884+
* however we need to check if the indent changes of the current line
885+
* match those of the current block and that the text of 'l' and 'cur'
886+
* after the indentation match.
867887
*/
888+
if (cur->es->s == DIFF_SYMBOL_PLUS)
889+
delta = a_width - c_width;
890+
else
891+
delta = c_width - a_width;
868892

869-
wslen = strlen(pmb->wsd.string);
870-
if (pmb->wsd.current_longer) {
871-
c += wslen;
872-
cl -= wslen;
873-
} else {
874-
a += wslen;
875-
al -= wslen;
876-
}
877-
878-
if (al != cl || memcmp(orig_a, b, bl) || memcmp(a, c, al))
879-
return 1;
880-
881-
return 0;
893+
return !(delta == pmb->wsd && al - a_off == cl - c_off &&
894+
!memcmp(a, b, al) && !
895+
memcmp(a + a_off, c + c_off, al - a_off));
882896
}
883897

884898
static int moved_entry_cmp(const void *hashmap_cmp_fn_data,
@@ -944,6 +958,9 @@ static void add_lines_to_move_detection(struct diff_options *o,
944958
continue;
945959
}
946960

961+
if (o->color_moved_ws_handling &
962+
COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
963+
fill_es_indent_data(&o->emitted_symbols->buf[n]);
947964
key = prepare_entry(o, n);
948965
if (prev_line && prev_line->es->s == o->emitted_symbols->buf[n].s)
949966
prev_line->next_line = key;
@@ -1022,8 +1039,7 @@ static int shrink_potential_moved_blocks(struct moved_block *pmb,
10221039

10231040
if (lp < pmb_nr && rp > -1 && lp < rp) {
10241041
pmb[lp] = pmb[rp];
1025-
pmb[rp].match = NULL;
1026-
pmb[rp].wsd.string = NULL;
1042+
memset(&pmb[rp], 0, sizeof(pmb[rp]));
10271043
rp--;
10281044
lp++;
10291045
}
@@ -1143,7 +1159,7 @@ static void mark_color_as_moved(struct diff_options *o,
11431159
&pmb[pmb_nr].wsd))
11441160
pmb[pmb_nr++].match = match;
11451161
} else {
1146-
pmb[pmb_nr].wsd.string = NULL;
1162+
pmb[pmb_nr].wsd = 0;
11471163
pmb[pmb_nr++].match = match;
11481164
}
11491165
}
@@ -1507,7 +1523,7 @@ static void emit_diff_symbol_from_struct(struct diff_options *o,
15071523
static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
15081524
const char *line, int len, unsigned flags)
15091525
{
1510-
struct emitted_diff_symbol e = {line, len, flags, s};
1526+
struct emitted_diff_symbol e = {line, len, flags, 0, 0, s};
15111527

15121528
if (o->emitted_symbols)
15131529
append_emitted_diff_symbol(o, &e);

t/t4015-diff-whitespace.sh

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1901,4 +1901,60 @@ test_expect_success 'compare whitespace delta incompatible with other space opti
19011901
test_i18ngrep allow-indentation-change err
19021902
'
19031903

1904+
test_expect_success 'compare mixed whitespace delta across moved blocks' '
1905+
1906+
git reset --hard &&
1907+
tr Q_ "\t " <<-EOF >text.txt &&
1908+
____Indented text to
1909+
_Q____be further indented by four spaces across
1910+
____Qseveral lines
1911+
QQ____These two lines have had their
1912+
____indentation reduced by four spaces
1913+
Qdifferent indentation change
1914+
____too short
1915+
EOF
1916+
1917+
git add text.txt &&
1918+
git commit -m "add text.txt" &&
1919+
1920+
tr Q_ "\t " <<-EOF >text.txt &&
1921+
QIndented text to
1922+
QQbe further indented by four spaces across
1923+
Q____several lines
1924+
Q_QThese two lines have had their
1925+
indentation reduced by four spaces
1926+
QQdifferent indentation change
1927+
__Qtoo short
1928+
EOF
1929+
1930+
git -c color.diff.whitespace="normal red" \
1931+
-c core.whitespace=space-before-tab \
1932+
diff --color --color-moved --ws-error-highlight=all \
1933+
--color-moved-ws=allow-indentation-change >actual.raw &&
1934+
grep -v "index" actual.raw | test_decode_color >actual &&
1935+
1936+
cat <<-\EOF >expected &&
1937+
<BOLD>diff --git a/text.txt b/text.txt<RESET>
1938+
<BOLD>--- a/text.txt<RESET>
1939+
<BOLD>+++ b/text.txt<RESET>
1940+
<CYAN>@@ -1,7 +1,7 @@<RESET>
1941+
<BOLD;MAGENTA>-<RESET><BOLD;MAGENTA> Indented text to<RESET>
1942+
<BOLD;MAGENTA>-<RESET><BRED> <RESET> <BOLD;MAGENTA> be further indented by four spaces across<RESET>
1943+
<BOLD;MAGENTA>-<RESET><BRED> <RESET> <BOLD;MAGENTA>several lines<RESET>
1944+
<BOLD;BLUE>-<RESET> <BOLD;BLUE> These two lines have had their<RESET>
1945+
<BOLD;BLUE>-<RESET><BOLD;BLUE> indentation reduced by four spaces<RESET>
1946+
<BOLD;MAGENTA>-<RESET> <BOLD;MAGENTA>different indentation change<RESET>
1947+
<RED>-<RESET><RED> too short<RESET>
1948+
<BOLD;CYAN>+<RESET> <BOLD;CYAN>Indented text to<RESET>
1949+
<BOLD;CYAN>+<RESET> <BOLD;CYAN>be further indented by four spaces across<RESET>
1950+
<BOLD;CYAN>+<RESET> <BOLD;CYAN> several lines<RESET>
1951+
<BOLD;YELLOW>+<RESET> <BRED> <RESET> <BOLD;YELLOW>These two lines have had their<RESET>
1952+
<BOLD;YELLOW>+<RESET><BOLD;YELLOW>indentation reduced by four spaces<RESET>
1953+
<BOLD;CYAN>+<RESET> <BOLD;CYAN>different indentation change<RESET>
1954+
<GREEN>+<RESET><BRED> <RESET> <GREEN>too short<RESET>
1955+
EOF
1956+
1957+
test_cmp expected actual
1958+
'
1959+
19041960
test_done

0 commit comments

Comments
 (0)