Skip to content

Commit 80c49c3

Browse files
trastgitster
authored andcommitted
color-words: make regex configurable via attributes
Make the --color-words splitting regular expression configurable via the diff driver's 'wordregex' attribute. The user can then set the driver on a file in .gitattributes. If a regex is given on the command line, it overrides the driver's setting. We also provide built-in regexes for the languages that already had funcname patterns, and add an appropriate diff driver entry for C/++. (The patterns are designed to run UTF-8 sequences into a single chunk to make sure they remain readable.) Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent c4b252c commit 80c49c3

File tree

6 files changed

+135
-15
lines changed

6 files changed

+135
-15
lines changed

Documentation/diff-options.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ differences. You may want to append `|[^[:space:]]` to your regular
102102
expression to make sure that it matches all non-whitespace characters.
103103
A match that contains a newline is silently truncated(!) at the
104104
newline.
105+
+
106+
The regex can also be set via a diff driver, see
107+
linkgit:gitattributes[1]; giving it explicitly overrides any diff
108+
driver setting.
105109

106110
--no-renames::
107111
Turn off rename detection, even when the configuration

Documentation/gitattributes.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,8 @@ patterns are available:
317317

318318
- `bibtex` suitable for files with BibTeX coded references.
319319

320+
- `cpp` suitable for source code in the C and C++ languages.
321+
320322
- `html` suitable for HTML/XHTML documents.
321323

322324
- `java` suitable for source code in the Java language.
@@ -334,6 +336,25 @@ patterns are available:
334336
- `tex` suitable for source code for LaTeX documents.
335337

336338

339+
Customizing word diff
340+
^^^^^^^^^^^^^^^^^^^^^
341+
342+
You can customize the rules that `git diff --color-words` uses to
343+
split words in a line, by specifying an appropriate regular expression
344+
in the "diff.*.wordregex" configuration variable. For example, in TeX
345+
a backslash followed by a sequence of letters forms a command, but
346+
several such commands can be run together without intervening
347+
whitespace. To separate them, use a regular expression such as
348+
349+
------------------------
350+
[diff "tex"]
351+
wordregex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{}[:space:]]+"
352+
------------------------
353+
354+
A built-in pattern is provided for all languages listed in the
355+
previous section.
356+
357+
337358
Performing text diffs of binary files
338359
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
339360

diff.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1380,6 +1380,12 @@ static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespe
13801380
return one->driver->funcname.pattern ? &one->driver->funcname : NULL;
13811381
}
13821382

1383+
static const char *userdiff_word_regex(struct diff_filespec *one)
1384+
{
1385+
diff_filespec_load_driver(one);
1386+
return one->driver->word_regex;
1387+
}
1388+
13831389
void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b)
13841390
{
13851391
if (!options->a_prefix)
@@ -1540,6 +1546,10 @@ static void builtin_diff(const char *name_a,
15401546
ecbdata.diff_words =
15411547
xcalloc(1, sizeof(struct diff_words_data));
15421548
ecbdata.diff_words->file = o->file;
1549+
if (!o->word_regex)
1550+
o->word_regex = userdiff_word_regex(one);
1551+
if (!o->word_regex)
1552+
o->word_regex = userdiff_word_regex(two);
15431553
if (o->word_regex) {
15441554
ecbdata.diff_words->word_regex = (regex_t *)
15451555
xmalloc(sizeof(regex_t));

t/t4034-diff-words.sh

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,41 @@ test_expect_success 'word diff with a regular expression' '
8484
8585
'
8686

87+
test_expect_success 'set a diff driver' '
88+
git config diff.testdriver.wordregex "[^[:space:]]" &&
89+
cat <<EOF > .gitattributes
90+
pre diff=testdriver
91+
post diff=testdriver
92+
EOF
93+
'
94+
95+
test_expect_success 'option overrides default' '
96+
97+
word_diff --color-words="[a-z]+"
98+
99+
'
100+
101+
cat > expect <<\EOF
102+
<WHITE>diff --git a/pre b/post<RESET>
103+
<WHITE>index 330b04f..5ed8eff 100644<RESET>
104+
<WHITE>--- a/pre<RESET>
105+
<WHITE>+++ b/post<RESET>
106+
<BROWN>@@ -1,3 +1,7 @@<RESET>
107+
h(4)<GREEN>,hh[44]<RESET>
108+
<RESET>
109+
a = b + c<RESET>
110+
111+
<GREEN>aa = a<RESET>
112+
113+
<GREEN>aeff = aeff * ( aaa )<RESET>
114+
EOF
115+
116+
test_expect_success 'use default supplied by driver' '
117+
118+
word_diff --color-words
119+
120+
'
121+
87122
echo 'aaa (aaa)' > pre
88123
echo 'aaa (aaa) aaa' > post
89124

@@ -100,6 +135,7 @@ test_expect_success 'test parsing words for newline' '
100135
101136
word_diff --color-words="a+"
102137
138+
103139
'
104140

105141
echo '(:' > pre

userdiff.c

Lines changed: 63 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,35 +6,81 @@ static struct userdiff_driver *drivers;
66
static int ndrivers;
77
static int drivers_alloc;
88

9-
#define FUNCNAME(name, pattern) \
10-
{ name, NULL, -1, { pattern, REG_EXTENDED } }
9+
#define PATTERNS(name, pattern, wordregex) \
10+
{ name, NULL, -1, { pattern, REG_EXTENDED }, wordregex }
1111
static struct userdiff_driver builtin_drivers[] = {
12-
FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"),
13-
FUNCNAME("java",
12+
PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$",
13+
"[^<>= \t]+|[^[:space:]]|[\x80-\xff]+"),
14+
PATTERNS("java",
1415
"!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
15-
"^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$"),
16-
FUNCNAME("objc",
16+
"^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
17+
"[a-zA-Z_][a-zA-Z0-9_]*"
18+
"|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
19+
"|[-+*/<>%&^|=!]="
20+
"|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|"
21+
"|[^[:space:]]|[\x80-\xff]+"),
22+
PATTERNS("objc",
1723
/* Negate C statements that can look like functions */
1824
"!^[ \t]*(do|for|if|else|return|switch|while)\n"
1925
/* Objective-C methods */
2026
"^[ \t]*([-+][ \t]*\\([ \t]*[A-Za-z_][A-Za-z_0-9* \t]*\\)[ \t]*[A-Za-z_].*)$\n"
2127
/* C functions */
2228
"^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$\n"
2329
/* Objective-C class/protocol definitions */
24-
"^(@(implementation|interface|protocol)[ \t].*)$"),
25-
FUNCNAME("pascal",
30+
"^(@(implementation|interface|protocol)[ \t].*)$",
31+
/* -- */
32+
"[a-zA-Z_][a-zA-Z0-9_]*"
33+
"|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
34+
"|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"
35+
"|[^[:space:]]|[\x80-\xff]+"),
36+
PATTERNS("pascal",
2637
"^((procedure|function|constructor|destructor|interface|"
2738
"implementation|initialization|finalization)[ \t]*.*)$"
2839
"\n"
29-
"^(.*=[ \t]*(class|record).*)$"),
30-
FUNCNAME("php", "^[\t ]*((function|class).*)"),
31-
FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"),
32-
FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"),
33-
FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"),
34-
FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"),
40+
"^(.*=[ \t]*(class|record).*)$",
41+
/* -- */
42+
"[a-zA-Z_][a-zA-Z0-9_]*"
43+
"|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+"
44+
"|<>|<=|>=|:=|\\.\\."
45+
"|[^[:space:]]|[\x80-\xff]+"),
46+
PATTERNS("php", "^[\t ]*((function|class).*)",
47+
/* -- */
48+
"[a-zA-Z_][a-zA-Z0-9_]*"
49+
"|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+"
50+
"|[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->"
51+
"|[^[:space:]]|[\x80-\xff]+"),
52+
PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$",
53+
/* -- */
54+
"[a-zA-Z_][a-zA-Z0-9_]*"
55+
"|[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?"
56+
"|[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?"
57+
"|[^[:space:]|[\x80-\xff]+"),
58+
/* -- */
59+
PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$",
60+
/* -- */
61+
"(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*"
62+
"|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?."
63+
"|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"
64+
"|[^[:space:]|[\x80-\xff]+"),
65+
PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
66+
"[={}\"]|[^={}\" \t]+"),
67+
PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
68+
"\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+|[^[:space:]]"),
69+
PATTERNS("cpp",
70+
/* Jump targets or access declarations */
71+
"!^[ \t]*[A-Za-z_][A-Za-z_0-9]*:.*$\n"
72+
/* C/++ functions/methods at top level */
73+
"^([A-Za-z_][A-Za-z_0-9]*([ \t]+[A-Za-z_][A-Za-z_0-9]*([ \t]*::[ \t]*[^[:space:]]+)?){1,}[ \t]*\\([^;]*)$\n"
74+
/* compound type at top level */
75+
"^((struct|class|enum)[^;]*)$",
76+
/* -- */
77+
"[a-zA-Z_][a-zA-Z0-9_]*"
78+
"|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
79+
"|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"
80+
"|[^[:space:]]|[\x80-\xff]+"),
3581
{ "default", NULL, -1, { NULL, 0 } },
3682
};
37-
#undef FUNCNAME
83+
#undef PATTERNS
3884

3985
static struct userdiff_driver driver_true = {
4086
"diff=true",
@@ -134,6 +180,8 @@ int userdiff_config(const char *k, const char *v)
134180
return parse_string(&drv->external, k, v);
135181
if ((drv = parse_driver(k, v, "textconv")))
136182
return parse_string(&drv->textconv, k, v);
183+
if ((drv = parse_driver(k, v, "wordregex")))
184+
return parse_string(&drv->word_regex, k, v);
137185

138186
return 0;
139187
}

userdiff.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ struct userdiff_driver {
1111
const char *external;
1212
int binary;
1313
struct userdiff_funcname funcname;
14+
const char *word_regex;
1415
const char *textconv;
1516
};
1617

0 commit comments

Comments
 (0)