Skip to content

Commit 2b6a541

Browse files
dschogitster
authored andcommitted
color-words: take an optional regular expression describing words
In some applications, words are not delimited by white space. To allow for that, you can specify a regular expression describing what makes a word with git diff --color-words='[A-Za-z0-9]+' Note that words cannot contain newline characters. As suggested by Thomas Rast, the words are the exact matches of the regular expression. Note that a regular expression beginning with a '^' will match only a word at the beginning of the hunk, not a word at the beginning of a line, and is probably not what you want. This commit contains a quoting fix by Thomas Rast. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 2e5d200 commit 2b6a541

File tree

4 files changed

+118
-10
lines changed

4 files changed

+118
-10
lines changed

Documentation/diff-options.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,12 @@ endif::git-format-patch[]
9191
Turn off colored diff, even when the configuration file
9292
gives the default to color output.
9393

94-
--color-words::
94+
--color-words[=regex]::
9595
Show colored word diff, i.e. color words which have changed.
96+
+
97+
Optionally, you can pass a regular expression that tells Git what the
98+
words are that you are looking for; The default is to interpret any
99+
stretch of non-whitespace as a word.
96100

97101
--no-renames::
98102
Turn off rename detection, even when the configuration

diff.c

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -333,12 +333,14 @@ static void diff_words_append(char *line, unsigned long len,
333333
len--;
334334
memcpy(buffer->text.ptr + buffer->text.size, line, len);
335335
buffer->text.size += len;
336+
buffer->text.ptr[buffer->text.size] = '\0';
336337
}
337338

338339
struct diff_words_data {
339340
struct diff_words_buffer minus, plus;
340341
const char *current_plus;
341342
FILE *file;
343+
regex_t *word_regex;
342344
};
343345

344346
static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
@@ -382,29 +384,58 @@ static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
382384
diff_words->current_plus = plus_end;
383385
}
384386

387+
/* This function starts looking at *begin, and returns 0 iff a word was found. */
388+
static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex,
389+
int *begin, int *end)
390+
{
391+
if (word_regex && *begin < buffer->size) {
392+
regmatch_t match[1];
393+
if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) {
394+
char *p = memchr(buffer->ptr + *begin + match[0].rm_so,
395+
'\n', match[0].rm_eo - match[0].rm_so);
396+
*end = p ? p - buffer->ptr : match[0].rm_eo + *begin;
397+
*begin += match[0].rm_so;
398+
return *begin >= *end;
399+
}
400+
return -1;
401+
}
402+
403+
/* find the next word */
404+
while (*begin < buffer->size && isspace(buffer->ptr[*begin]))
405+
(*begin)++;
406+
if (*begin >= buffer->size)
407+
return -1;
408+
409+
/* find the end of the word */
410+
*end = *begin + 1;
411+
while (*end < buffer->size && !isspace(buffer->ptr[*end]))
412+
(*end)++;
413+
414+
return 0;
415+
}
416+
385417
/*
386418
* This function splits the words in buffer->text, stores the list with
387419
* newline separator into out, and saves the offsets of the original words
388420
* in buffer->orig.
389421
*/
390-
static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
422+
static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out,
423+
regex_t *word_regex)
391424
{
392425
int i, j;
426+
long alloc = 0;
393427

394428
out->size = 0;
395-
out->ptr = xmalloc(buffer->text.size);
429+
out->ptr = NULL;
396430

397431
/* fake an empty "0th" word */
398432
ALLOC_GROW(buffer->orig, 1, buffer->orig_alloc);
399433
buffer->orig[0].begin = buffer->orig[0].end = buffer->text.ptr;
400434
buffer->orig_nr = 1;
401435

402436
for (i = 0; i < buffer->text.size; i++) {
403-
if (isspace(buffer->text.ptr[i]))
404-
continue;
405-
for (j = i + 1; j < buffer->text.size &&
406-
!isspace(buffer->text.ptr[j]); j++)
407-
; /* find the end of the word */
437+
if (find_word_boundaries(&buffer->text, word_regex, &i, &j))
438+
return;
408439

409440
/* store original boundaries */
410441
ALLOC_GROW(buffer->orig, buffer->orig_nr + 1,
@@ -414,6 +445,7 @@ static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
414445
buffer->orig_nr++;
415446

416447
/* store one word */
448+
ALLOC_GROW(out->ptr, out->size + j - i + 1, alloc);
417449
memcpy(out->ptr + out->size, buffer->text.ptr + i, j - i);
418450
out->ptr[out->size + j - i] = '\n';
419451
out->size += j - i + 1;
@@ -443,9 +475,10 @@ static void diff_words_show(struct diff_words_data *diff_words)
443475

444476
memset(&xpp, 0, sizeof(xpp));
445477
memset(&xecfg, 0, sizeof(xecfg));
446-
diff_words_fill(&diff_words->minus, &minus);
447-
diff_words_fill(&diff_words->plus, &plus);
478+
diff_words_fill(&diff_words->minus, &minus, diff_words->word_regex);
479+
diff_words_fill(&diff_words->plus, &plus, diff_words->word_regex);
448480
xpp.flags = XDF_NEED_MINIMAL;
481+
/* as only the hunk header will be parsed, we need a 0-context */
449482
xecfg.ctxlen = 0;
450483
xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words,
451484
&xpp, &xecfg, &ecb);
@@ -484,6 +517,7 @@ static void free_diff_words_data(struct emit_callback *ecbdata)
484517
free (ecbdata->diff_words->minus.orig);
485518
free (ecbdata->diff_words->plus.text.ptr);
486519
free (ecbdata->diff_words->plus.orig);
520+
free(ecbdata->diff_words->word_regex);
487521
free(ecbdata->diff_words);
488522
ecbdata->diff_words = NULL;
489523
}
@@ -1506,6 +1540,14 @@ static void builtin_diff(const char *name_a,
15061540
ecbdata.diff_words =
15071541
xcalloc(1, sizeof(struct diff_words_data));
15081542
ecbdata.diff_words->file = o->file;
1543+
if (o->word_regex) {
1544+
ecbdata.diff_words->word_regex = (regex_t *)
1545+
xmalloc(sizeof(regex_t));
1546+
if (regcomp(ecbdata.diff_words->word_regex,
1547+
o->word_regex, REG_EXTENDED))
1548+
die ("Invalid regular expression: %s",
1549+
o->word_regex);
1550+
}
15091551
}
15101552
xdi_diff_outf(&mf1, &mf2, fn_out_consume, &ecbdata,
15111553
&xpp, &xecfg, &ecb);
@@ -2517,6 +2559,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
25172559
DIFF_OPT_CLR(options, COLOR_DIFF);
25182560
else if (!strcmp(arg, "--color-words"))
25192561
options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS;
2562+
else if (!prefixcmp(arg, "--color-words=")) {
2563+
options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS;
2564+
options->word_regex = arg + 14;
2565+
}
25202566
else if (!strcmp(arg, "--exit-code"))
25212567
DIFF_OPT_SET(options, EXIT_WITH_STATUS);
25222568
else if (!strcmp(arg, "--quiet"))

diff.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ struct diff_options {
9898

9999
int stat_width;
100100
int stat_name_width;
101+
const char *word_regex;
101102

102103
/* this is set by diffcore for DIFF_FORMAT_PATCH */
103104
int found_changes;

t/t4034-diff-words.sh

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,61 @@ test_expect_success 'word diff with runs of whitespace' '
6363
6464
'
6565

66+
cat > expect <<\EOF
67+
<WHITE>diff --git a/pre b/post<RESET>
68+
<WHITE>index 330b04f..5ed8eff 100644<RESET>
69+
<WHITE>--- a/pre<RESET>
70+
<WHITE>+++ b/post<RESET>
71+
<BROWN>@@ -1,3 +1,7 @@<RESET>
72+
h(4),<GREEN>hh<RESET>[44]
73+
<RESET>
74+
a = b + c<RESET>
75+
76+
<GREEN>aa = a<RESET>
77+
78+
<GREEN>aeff = aeff * ( aaa<RESET> )
79+
EOF
80+
81+
test_expect_success 'word diff with a regular expression' '
82+
83+
word_diff --color-words="[a-z]+"
84+
85+
'
86+
87+
echo 'aaa (aaa)' > pre
88+
echo 'aaa (aaa) aaa' > post
89+
90+
cat > expect <<\EOF
91+
<WHITE>diff --git a/pre b/post<RESET>
92+
<WHITE>index c29453b..be22f37 100644<RESET>
93+
<WHITE>--- a/pre<RESET>
94+
<WHITE>+++ b/post<RESET>
95+
<BROWN>@@ -1 +1 @@<RESET>
96+
aaa (aaa) <GREEN>aaa<RESET>
97+
EOF
98+
99+
test_expect_success 'test parsing words for newline' '
100+
101+
word_diff --color-words="a+"
102+
103+
'
104+
105+
echo '(:' > pre
106+
echo '(' > post
107+
108+
cat > expect <<\EOF
109+
<WHITE>diff --git a/pre b/post<RESET>
110+
<WHITE>index 289cb9d..2d06f37 100644<RESET>
111+
<WHITE>--- a/pre<RESET>
112+
<WHITE>+++ b/post<RESET>
113+
<BROWN>@@ -1 +1 @@<RESET>
114+
(<RED>:<RESET>
115+
EOF
116+
117+
test_expect_success 'test when words are only removed at the end' '
118+
119+
word_diff --color-words=.
120+
121+
'
122+
66123
test_done

0 commit comments

Comments
 (0)