77#include "sigchain.h"
88#include "pkt-line.h"
99#include "sub-process.h"
10+ #include "utf8.h"
1011
1112/*
1213 * convert.c - convert a file when checking it out and checking it in.
@@ -265,6 +266,78 @@ static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats,
265266
266267}
267268
269+ static const char * default_encoding = "UTF-8" ;
270+
271+ static int encode_to_git (const char * path , const char * src , size_t src_len ,
272+ struct strbuf * buf , const char * enc , int conv_flags )
273+ {
274+ char * dst ;
275+ int dst_len ;
276+ int die_on_error = conv_flags & CONV_WRITE_OBJECT ;
277+
278+ /*
279+ * No encoding is specified or there is nothing to encode.
280+ * Tell the caller that the content was not modified.
281+ */
282+ if (!enc || (src && !src_len ))
283+ return 0 ;
284+
285+ /*
286+ * Looks like we got called from "would_convert_to_git()".
287+ * This means Git wants to know if it would encode (= modify!)
288+ * the content. Let's answer with "yes", since an encoding was
289+ * specified.
290+ */
291+ if (!buf && !src )
292+ return 1 ;
293+
294+ dst = reencode_string_len (src , src_len , default_encoding , enc ,
295+ & dst_len );
296+ if (!dst ) {
297+ /*
298+ * We could add the blob "as-is" to Git. However, on checkout
299+ * we would try to reencode to the original encoding. This
300+ * would fail and we would leave the user with a messed-up
301+ * working tree. Let's try to avoid this by screaming loud.
302+ */
303+ const char * msg = _ ("failed to encode '%s' from %s to %s" );
304+ if (die_on_error )
305+ die (msg , path , enc , default_encoding );
306+ else {
307+ error (msg , path , enc , default_encoding );
308+ return 0 ;
309+ }
310+ }
311+
312+ strbuf_attach (buf , dst , dst_len , dst_len + 1 );
313+ return 1 ;
314+ }
315+
316+ static int encode_to_worktree (const char * path , const char * src , size_t src_len ,
317+ struct strbuf * buf , const char * enc )
318+ {
319+ char * dst ;
320+ int dst_len ;
321+
322+ /*
323+ * No encoding is specified or there is nothing to encode.
324+ * Tell the caller that the content was not modified.
325+ */
326+ if (!enc || (src && !src_len ))
327+ return 0 ;
328+
329+ dst = reencode_string_len (src , src_len , enc , default_encoding ,
330+ & dst_len );
331+ if (!dst ) {
332+ error ("failed to encode '%s' from %s to %s" ,
333+ path , default_encoding , enc );
334+ return 0 ;
335+ }
336+
337+ strbuf_attach (buf , dst , dst_len , dst_len + 1 );
338+ return 1 ;
339+ }
340+
268341static int crlf_to_git (const struct index_state * istate ,
269342 const char * path , const char * src , size_t len ,
270343 struct strbuf * buf ,
@@ -978,6 +1051,24 @@ static int ident_to_worktree(const char *path, const char *src, size_t len,
9781051 return 1 ;
9791052}
9801053
1054+ static const char * git_path_check_encoding (struct attr_check_item * check )
1055+ {
1056+ const char * value = check -> value ;
1057+
1058+ if (ATTR_UNSET (value ) || !strlen (value ))
1059+ return NULL ;
1060+
1061+ if (ATTR_TRUE (value ) || ATTR_FALSE (value )) {
1062+ die (_ ("true/false are no valid working-tree-encodings" ));
1063+ }
1064+
1065+ /* Don't encode to the default encoding */
1066+ if (same_encoding (value , default_encoding ))
1067+ return NULL ;
1068+
1069+ return value ;
1070+ }
1071+
9811072static enum crlf_action git_path_check_crlf (struct attr_check_item * check )
9821073{
9831074 const char * value = check -> value ;
@@ -1033,6 +1124,7 @@ struct conv_attrs {
10331124 enum crlf_action attr_action ; /* What attr says */
10341125 enum crlf_action crlf_action ; /* When no attr is set, use core.autocrlf */
10351126 int ident ;
1127+ const char * working_tree_encoding ; /* Supported encoding or default encoding if NULL */
10361128};
10371129
10381130static void convert_attrs (struct conv_attrs * ca , const char * path )
@@ -1041,7 +1133,8 @@ static void convert_attrs(struct conv_attrs *ca, const char *path)
10411133
10421134 if (!check ) {
10431135 check = attr_check_initl ("crlf" , "ident" , "filter" ,
1044- "eol" , "text" , NULL );
1136+ "eol" , "text" , "working-tree-encoding" ,
1137+ NULL );
10451138 user_convert_tail = & user_convert ;
10461139 git_config (read_convert_config , NULL );
10471140 }
@@ -1064,6 +1157,7 @@ static void convert_attrs(struct conv_attrs *ca, const char *path)
10641157 else if (eol_attr == EOL_CRLF )
10651158 ca -> crlf_action = CRLF_TEXT_CRLF ;
10661159 }
1160+ ca -> working_tree_encoding = git_path_check_encoding (ccheck + 5 );
10671161 } else {
10681162 ca -> drv = NULL ;
10691163 ca -> crlf_action = CRLF_UNDEFINED ;
@@ -1144,6 +1238,13 @@ int convert_to_git(const struct index_state *istate,
11441238 src = dst -> buf ;
11451239 len = dst -> len ;
11461240 }
1241+
1242+ ret |= encode_to_git (path , src , len , dst , ca .working_tree_encoding , conv_flags );
1243+ if (ret && dst ) {
1244+ src = dst -> buf ;
1245+ len = dst -> len ;
1246+ }
1247+
11471248 if (!(conv_flags & CONV_EOL_KEEP_CRLF )) {
11481249 ret |= crlf_to_git (istate , path , src , len , dst , ca .crlf_action , conv_flags );
11491250 if (ret && dst ) {
@@ -1167,6 +1268,7 @@ void convert_to_git_filter_fd(const struct index_state *istate,
11671268 if (!apply_filter (path , NULL , 0 , fd , dst , ca .drv , CAP_CLEAN , NULL ))
11681269 die ("%s: clean filter '%s' failed" , path , ca .drv -> name );
11691270
1271+ encode_to_git (path , dst -> buf , dst -> len , dst , ca .working_tree_encoding , conv_flags );
11701272 crlf_to_git (istate , path , dst -> buf , dst -> len , dst , ca .crlf_action , conv_flags );
11711273 ident_to_git (path , dst -> buf , dst -> len , dst , ca .ident );
11721274}
@@ -1198,6 +1300,12 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
11981300 }
11991301 }
12001302
1303+ ret |= encode_to_worktree (path , src , len , dst , ca .working_tree_encoding );
1304+ if (ret ) {
1305+ src = dst -> buf ;
1306+ len = dst -> len ;
1307+ }
1308+
12011309 ret_filter = apply_filter (
12021310 path , src , len , -1 , dst , ca .drv , CAP_SMUDGE , dco );
12031311 if (!ret_filter && ca .drv && ca .drv -> required )
@@ -1664,6 +1772,9 @@ struct stream_filter *get_stream_filter(const char *path, const unsigned char *s
16641772 if (ca .drv && (ca .drv -> process || ca .drv -> smudge || ca .drv -> clean ))
16651773 return NULL ;
16661774
1775+ if (ca .working_tree_encoding )
1776+ return NULL ;
1777+
16671778 if (ca .crlf_action == CRLF_AUTO || ca .crlf_action == CRLF_AUTO_CRLF )
16681779 return NULL ;
16691780
0 commit comments