Skip to content

Commit 2ac4b4b

Browse files
committed
Merge branch 'sp/safecrlf'
* sp/safecrlf: safecrlf: Add mechanism to warn about irreversible crlf conversions
2 parents 9907326 + 21e5ad5 commit 2ac4b4b

File tree

11 files changed

+189
-11
lines changed

11 files changed

+189
-11
lines changed

Documentation/config.txt

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,51 @@ core.autocrlf::
139139
"text" (i.e. be subjected to the autocrlf mechanism) is
140140
decided purely based on the contents.
141141

142+
core.safecrlf::
143+
If true, makes git check if converting `CRLF` as controlled by
144+
`core.autocrlf` is reversible. Git will verify if a command
145+
modifies a file in the work tree either directly or indirectly.
146+
For example, committing a file followed by checking out the
147+
same file should yield the original file in the work tree. If
148+
this is not the case for the current setting of
149+
`core.autocrlf`, git will reject the file. The variable can
150+
be set to "warn", in which case git will only warn about an
151+
irreversible conversion but continue the operation.
152+
+
153+
CRLF conversion bears a slight chance of corrupting data.
154+
autocrlf=true will convert CRLF to LF during commit and LF to
155+
CRLF during checkout. A file that contains a mixture of LF and
156+
CRLF before the commit cannot be recreated by git. For text
157+
files this is the right thing to do: it corrects line endings
158+
such that we have only LF line endings in the repository.
159+
But for binary files that are accidentally classified as text the
160+
conversion can corrupt data.
161+
+
162+
If you recognize such corruption early you can easily fix it by
163+
setting the conversion type explicitly in .gitattributes. Right
164+
after committing you still have the original file in your work
165+
tree and this file is not yet corrupted. You can explicitly tell
166+
git that this file is binary and git will handle the file
167+
appropriately.
168+
+
169+
Unfortunately, the desired effect of cleaning up text files with
170+
mixed line endings and the undesired effect of corrupting binary
171+
files cannot be distinguished. In both cases CRLFs are removed
172+
in an irreversible way. For text files this is the right thing
173+
to do because CRLFs are line endings, while for binary files
174+
converting CRLFs corrupts data.
175+
+
176+
Note, this safety check does not mean that a checkout will generate a
177+
file identical to the original file for a different setting of
178+
`core.autocrlf`, but only for the current one. For example, a text
179+
file with `LF` would be accepted with `core.autocrlf=input` and could
180+
later be checked out with `core.autocrlf=true`, in which case the
181+
resulting file would contain `CRLF`, although the original file
182+
contained `LF`. However, in both work trees the line endings would be
183+
consistent, that is either all `LF` or all `CRLF`, but never mixed. A
184+
file with mixed line endings would be reported by the `core.safecrlf`
185+
mechanism.
186+
142187
core.symlinks::
143188
If false, symbolic links are checked out as small plain files that
144189
contain the link text. linkgit:git-update-index[1] and

Documentation/gitattributes.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,26 @@ When `core.autocrlf` is set to "input", line endings are
133133
converted to LF upon checkin, but there is no conversion done
134134
upon checkout.
135135

136+
If `core.safecrlf` is set to "true" or "warn", git verifies if
137+
the conversion is reversible for the current setting of
138+
`core.autocrlf`. For "true", git rejects irreversible
139+
conversions; for "warn", git only prints a warning but accepts
140+
an irreversible conversion. The safety triggers to prevent such
141+
a conversion done to the files in the work tree, but there are a
142+
few exceptions. Even though...
143+
144+
- "git add" itself does not touch the files in the work tree, the
145+
next checkout would, so the safety triggers;
146+
147+
- "git apply" to update a text file with a patch does touch the files
148+
in the work tree, but the operation is about text files and CRLF
149+
conversion is about fixing the line ending inconsistencies, so the
150+
safety does not trigger;
151+
152+
- "git diff" itself does not touch the files in the work tree, it is
153+
often run to inspect the changes you intend to next "git add". To
154+
catch potential problems early, safety triggers.
155+
136156

137157
`ident`
138158
^^^^^^^

builtin-apply.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1430,7 +1430,7 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
14301430
case S_IFREG:
14311431
if (strbuf_read_file(buf, path, st->st_size) != st->st_size)
14321432
return error("unable to open or read %s", path);
1433-
convert_to_git(path, buf->buf, buf->len, buf);
1433+
convert_to_git(path, buf->buf, buf->len, buf, 0);
14341434
return 0;
14351435
default:
14361436
return -1;

builtin-blame.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2073,7 +2073,7 @@ static struct commit *fake_working_tree_commit(const char *path, const char *con
20732073
if (strbuf_read(&buf, 0, 0) < 0)
20742074
die("read error %s from stdin", strerror(errno));
20752075
}
2076-
convert_to_git(path, buf.buf, buf.len, &buf);
2076+
convert_to_git(path, buf.buf, buf.len, &buf, 0);
20772077
origin->file.ptr = buf.buf;
20782078
origin->file.size = buf.len;
20792079
pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1);

cache.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,14 @@ extern size_t packed_git_limit;
383383
extern size_t delta_base_cache_limit;
384384
extern int auto_crlf;
385385

386+
enum safe_crlf {
387+
SAFE_CRLF_FALSE = 0,
388+
SAFE_CRLF_FAIL = 1,
389+
SAFE_CRLF_WARN = 2,
390+
};
391+
392+
extern enum safe_crlf safe_crlf;
393+
386394
#define GIT_REPO_VERSION 0
387395
extern int repository_format_version;
388396
extern int check_repository_format(void);
@@ -691,7 +699,8 @@ extern void trace_argv_printf(const char **argv, const char *format, ...);
691699

692700
/* convert.c */
693701
/* returns 1 if *dst was used */
694-
extern int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst);
702+
extern int convert_to_git(const char *path, const char *src, size_t len,
703+
struct strbuf *dst, enum safe_crlf checksafe);
695704
extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst);
696705

697706
/* add */

config.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,15 @@ int git_default_config(const char *var, const char *value)
415415
return 0;
416416
}
417417

418+
if (!strcmp(var, "core.safecrlf")) {
419+
if (value && !strcasecmp(value, "warn")) {
420+
safe_crlf = SAFE_CRLF_WARN;
421+
return 0;
422+
}
423+
safe_crlf = git_config_bool(var, value);
424+
return 0;
425+
}
426+
418427
if (!strcmp(var, "user.name")) {
419428
if (!value)
420429
return config_error_nonbool(var);

convert.c

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,39 @@ static int is_binary(unsigned long size, struct text_stat *stats)
8585
return 0;
8686
}
8787

88+
static void check_safe_crlf(const char *path, int action,
89+
struct text_stat *stats, enum safe_crlf checksafe)
90+
{
91+
if (!checksafe)
92+
return;
93+
94+
if (action == CRLF_INPUT || auto_crlf <= 0) {
95+
/*
96+
* CRLFs would not be restored by checkout:
97+
* check if we'd remove CRLFs
98+
*/
99+
if (stats->crlf) {
100+
if (checksafe == SAFE_CRLF_WARN)
101+
warning("CRLF will be replaced by LF in %s.", path);
102+
else /* i.e. SAFE_CRLF_FAIL */
103+
die("CRLF would be replaced by LF in %s.", path);
104+
}
105+
} else if (auto_crlf > 0) {
106+
/*
107+
* CRLFs would be added by checkout:
108+
* check if we have "naked" LFs
109+
*/
110+
if (stats->lf != stats->crlf) {
111+
if (checksafe == SAFE_CRLF_WARN)
112+
warning("LF will be replaced by CRLF in %s", path);
113+
else /* i.e. SAFE_CRLF_FAIL */
114+
die("LF would be replaced by CRLF in %s", path);
115+
}
116+
}
117+
}
118+
88119
static int crlf_to_git(const char *path, const char *src, size_t len,
89-
struct strbuf *buf, int action)
120+
struct strbuf *buf, int action, enum safe_crlf checksafe)
90121
{
91122
struct text_stat stats;
92123
char *dst;
@@ -95,9 +126,6 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
95126
return 0;
96127

97128
gather_stats(src, len, &stats);
98-
/* No CR? Nothing to convert, regardless. */
99-
if (!stats.cr)
100-
return 0;
101129

102130
if (action == CRLF_GUESS) {
103131
/*
@@ -115,6 +143,12 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
115143
return 0;
116144
}
117145

146+
check_safe_crlf(path, action, &stats, checksafe);
147+
148+
/* Optimization: No CR? Nothing to convert, regardless. */
149+
if (!stats.cr)
150+
return 0;
151+
118152
/* only grow if not in place */
119153
if (strbuf_avail(buf) + buf->len < len)
120154
strbuf_grow(buf, len - buf->len);
@@ -536,7 +570,8 @@ static int git_path_check_ident(const char *path, struct git_attr_check *check)
536570
return !!ATTR_TRUE(value);
537571
}
538572

539-
int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst)
573+
int convert_to_git(const char *path, const char *src, size_t len,
574+
struct strbuf *dst, enum safe_crlf checksafe)
540575
{
541576
struct git_attr_check check[3];
542577
int crlf = CRLF_GUESS;
@@ -558,7 +593,7 @@ int convert_to_git(const char *path, const char *src, size_t len, struct strbuf
558593
src = dst->buf;
559594
len = dst->len;
560595
}
561-
ret |= crlf_to_git(path, src, len, dst, crlf);
596+
ret |= crlf_to_git(path, src, len, dst, crlf, checksafe);
562597
if (ret) {
563598
src = dst->buf;
564599
len = dst->len;

diff.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1631,7 +1631,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
16311631
* Convert from working tree format to canonical git format
16321632
*/
16331633
strbuf_init(&buf, 0);
1634-
if (convert_to_git(s->path, s->data, s->size, &buf)) {
1634+
if (convert_to_git(s->path, s->data, s->size, &buf, safe_crlf)) {
16351635
size_t size = 0;
16361636
munmap(s->data, s->size);
16371637
s->should_munmap = 0;

environment.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ int pager_use_color = 1;
3535
const char *editor_program;
3636
const char *excludes_file;
3737
int auto_crlf = 0; /* 1: both ways, -1: only when adding git objects */
38+
enum safe_crlf safe_crlf = SAFE_CRLF_WARN;
3839
unsigned whitespace_rule_cfg = WS_DEFAULT_RULE;
3940

4041
/* This is set by setup_git_dir_gently() and/or git_default_config() */

sha1_file.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2358,7 +2358,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
23582358
if ((type == OBJ_BLOB) && S_ISREG(st->st_mode)) {
23592359
struct strbuf nbuf;
23602360
strbuf_init(&nbuf, 0);
2361-
if (convert_to_git(path, buf, size, &nbuf)) {
2361+
if (convert_to_git(path, buf, size, &nbuf,
2362+
write_object ? safe_crlf : 0)) {
23622363
munmap(buf, size);
23632364
buf = strbuf_detach(&nbuf, &size);
23642365
re_allocated = 1;

0 commit comments

Comments
 (0)