Skip to content

Commit eff7375

Browse files
author
Junio C Hamano
committed
Merge branch 'jc/utf8'
* jc/utf8: t3900: test conversion to non UTF-8 as well Rename t3900 test vector file UTF-8: introduce i18n.logoutputencoding. Teach log family --encoding i18n.logToUTF8: convert commit log message to UTF-8 Move encoding conversion routine out of mailinfo to utf8.c Conflicts: commit.c
2 parents 013672b + 7255ff0 commit eff7375

File tree

18 files changed

+308
-43
lines changed

18 files changed

+308
-43
lines changed

Documentation/config.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,10 @@ i18n.commitEncoding::
267267
browser (and possibly at other places in the future or in other
268268
porcelains). See e.g. gitlink:git-mailinfo[1]. Defaults to 'utf-8'.
269269

270+
i18n.logOutputEncoding::
271+
Character encoding the commit messages are converted to when
272+
running `git-log` and friends.
273+
270274
log.showroot::
271275
If true, the initial commit will be shown as a big creation event.
272276
This is equivalent to a diff against an empty tree.

builtin-commit-tree.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
9292
char comment[1000];
9393
char *buffer;
9494
unsigned int size;
95+
int encoding_is_utf8;
9596

9697
setup_ident();
9798
git_config(git_default_config);
@@ -117,6 +118,10 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
117118
parents++;
118119
}
119120

121+
/* Not having i18n.commitencoding is the same as having utf-8 */
122+
encoding_is_utf8 = (!git_commit_encoding ||
123+
!strcmp(git_commit_encoding, "utf-8"));
124+
120125
init_buffer(&buffer, &size);
121126
add_buffer(&buffer, &size, "tree %s\n", sha1_to_hex(tree_sha1));
122127

@@ -130,15 +135,19 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
130135

131136
/* Person/date information */
132137
add_buffer(&buffer, &size, "author %s\n", git_author_info(1));
133-
add_buffer(&buffer, &size, "committer %s\n\n", git_committer_info(1));
138+
add_buffer(&buffer, &size, "committer %s\n", git_committer_info(1));
139+
if (!encoding_is_utf8)
140+
add_buffer(&buffer, &size,
141+
"encoding %s\n", git_commit_encoding);
142+
add_buffer(&buffer, &size, "\n");
134143

135144
/* And add the comment */
136145
while (fgets(comment, sizeof(comment), stdin) != NULL)
137146
add_buffer(&buffer, &size, "%s", comment);
138147

139148
/* And check the encoding */
140149
buffer[size] = '\0';
141-
if (!strcmp(git_commit_encoding, "utf-8") && !is_utf8(buffer))
150+
if (encoding_is_utf8 && !is_utf8(buffer))
142151
fprintf(stderr, commit_utf8_warn);
143152

144153
if (!write_sha1_file(buffer, size, commit_type, commit_sha1)) {

builtin-log.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,27 @@ void add_head(struct rev_info *revs);
2020
static void cmd_log_init(int argc, const char **argv, const char *prefix,
2121
struct rev_info *rev)
2222
{
23+
int i;
24+
2325
rev->abbrev = DEFAULT_ABBREV;
2426
rev->commit_format = CMIT_FMT_DEFAULT;
2527
rev->verbose_header = 1;
2628
rev->show_root_diff = default_show_root;
2729
argc = setup_revisions(argc, argv, rev, "HEAD");
2830
if (rev->diffopt.pickaxe || rev->diffopt.filter)
2931
rev->always_show_header = 0;
30-
if (argc > 1)
31-
die("unrecognized argument: %s", argv[1]);
32+
for (i = 1; i < argc; i++) {
33+
const char *arg = argv[i];
34+
if (!strncmp(arg, "--encoding=", 11)) {
35+
arg += 11;
36+
if (strcmp(arg, "none"))
37+
git_log_output_encoding = strdup(arg);
38+
else
39+
git_log_output_encoding = "";
40+
}
41+
else
42+
die("unrecognized argument: %s", arg);
43+
}
3244
}
3345

3446
static int cmd_log_walk(struct rev_info *rev)

builtin-mailinfo.c

Lines changed: 11 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
*/
55
#include "cache.h"
66
#include "builtin.h"
7+
#include "utf8.h"
78

89
static FILE *cmitmsg, *patchfile, *fin, *fout;
910

@@ -510,40 +511,18 @@ static int decode_b_segment(char *in, char *ot, char *ep)
510511

511512
static void convert_to_utf8(char *line, char *charset)
512513
{
513-
#ifndef NO_ICONV
514-
char *in, *out;
515-
size_t insize, outsize, nrc;
516-
char outbuf[4096]; /* cheat */
517514
static char latin_one[] = "latin1";
518515
char *input_charset = *charset ? charset : latin_one;
519-
iconv_t conv = iconv_open(metainfo_charset, input_charset);
520-
521-
if (conv == (iconv_t) -1) {
522-
static int warned_latin1_once = 0;
523-
if (input_charset != latin_one) {
524-
fprintf(stderr, "cannot convert from %s to %s\n",
525-
input_charset, metainfo_charset);
526-
*charset = 0;
527-
}
528-
else if (!warned_latin1_once) {
529-
warned_latin1_once = 1;
530-
fprintf(stderr, "tried to convert from %s to %s, "
531-
"but your iconv does not work with it.\n",
532-
input_charset, metainfo_charset);
533-
}
516+
char *out = reencode_string(line, metainfo_charset, input_charset);
517+
518+
if (!out) {
519+
fprintf(stderr, "cannot convert from %s to %s\n",
520+
input_charset, metainfo_charset);
521+
*charset = 0;
534522
return;
535523
}
536-
in = line;
537-
insize = strlen(in);
538-
out = outbuf;
539-
outsize = sizeof(outbuf);
540-
nrc = iconv(conv, &in, &insize, &out, &outsize);
541-
iconv_close(conv);
542-
if (nrc == (size_t) -1)
543-
return;
544-
*out = 0;
545-
strcpy(line, outbuf);
546-
#endif
524+
strcpy(line, out);
525+
free(out);
547526
}
548527

549528
static int decode_header_bq(char *it)
@@ -827,7 +806,8 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
827806
if (!strcmp(argv[1], "-k"))
828807
keep_subject = 1;
829808
else if (!strcmp(argv[1], "-u"))
830-
metainfo_charset = git_commit_encoding;
809+
metainfo_charset = (git_commit_encoding
810+
? git_commit_encoding : "utf-8");
831811
else if (!strncmp(argv[1], "--encoding=", 11))
832812
metainfo_charset = argv[1] + 11;
833813
else

cache.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -416,8 +416,8 @@ extern int check_repository_format_version(const char *var, const char *value);
416416
extern char git_default_email[MAX_GITNAME];
417417
extern char git_default_name[MAX_GITNAME];
418418

419-
#define MAX_ENCODING_LENGTH 64
420-
extern char git_commit_encoding[MAX_ENCODING_LENGTH];
419+
extern char *git_commit_encoding;
420+
extern char *git_log_output_encoding;
421421

422422
extern int copy_fd(int ifd, int ofd);
423423
extern void write_or_die(int fd, const void *buf, size_t count);

commit.c

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "tag.h"
33
#include "commit.h"
44
#include "pkt-line.h"
5+
#include "utf8.h"
56

67
int save_commit_buffer = 1;
78

@@ -597,17 +598,72 @@ static int add_merge_info(enum cmit_fmt fmt, char *buf, const struct commit *com
597598
return offset;
598599
}
599600

600-
unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
601-
unsigned long len, char *buf, unsigned long space,
601+
static char *get_header(const struct commit *commit, const char *key)
602+
{
603+
int key_len = strlen(key);
604+
const char *line = commit->buffer;
605+
606+
for (;;) {
607+
const char *eol = strchr(line, '\n'), *next;
608+
609+
if (line == eol)
610+
return NULL;
611+
if (!eol) {
612+
eol = line + strlen(line);
613+
next = NULL;
614+
} else
615+
next = eol + 1;
616+
if (!strncmp(line, key, key_len) && line[key_len] == ' ') {
617+
int len = eol - line - key_len;
618+
char *ret = xmalloc(len);
619+
memcpy(ret, line + key_len + 1, len - 1);
620+
ret[len - 1] = '\0';
621+
return ret;
622+
}
623+
line = next;
624+
}
625+
}
626+
627+
static char *logmsg_reencode(const struct commit *commit)
628+
{
629+
char *encoding;
630+
char *out;
631+
char *output_encoding = (git_log_output_encoding
632+
? git_log_output_encoding
633+
: git_commit_encoding);
634+
635+
if (!output_encoding)
636+
return NULL;
637+
encoding = get_header(commit, "encoding");
638+
if (!encoding || !strcmp(encoding, output_encoding)) {
639+
free(encoding);
640+
return NULL;
641+
}
642+
out = reencode_string(commit->buffer, output_encoding, encoding);
643+
free(encoding);
644+
if (!out)
645+
return NULL;
646+
return out;
647+
}
648+
649+
unsigned long pretty_print_commit(enum cmit_fmt fmt,
650+
const struct commit *commit,
651+
unsigned long len,
652+
char *buf, unsigned long space,
602653
int abbrev, const char *subject,
603-
const char *after_subject, int relative_date)
654+
const char *after_subject,
655+
int relative_date)
604656
{
605657
int hdr = 1, body = 0;
606658
unsigned long offset = 0;
607659
int indent = 4;
608660
int parents_shown = 0;
609661
const char *msg = commit->buffer;
610662
int plain_non_ascii = 0;
663+
char *reencoded = logmsg_reencode(commit);
664+
665+
if (reencoded)
666+
msg = reencoded;
611667

612668
if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
613669
indent = 0;
@@ -624,7 +680,7 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
624680
for (in_body = i = 0; (ch = msg[i]) && i < len; i++) {
625681
if (!in_body) {
626682
/* author could be non 7-bit ASCII but
627-
* the log may so; skip over the
683+
* the log may be so; skip over the
628684
* header part first.
629685
*/
630686
if (ch == '\n' &&
@@ -755,6 +811,8 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
755811
if (fmt == CMIT_FMT_EMAIL && !body)
756812
buf[offset++] = '\n';
757813
buf[offset] = '\0';
814+
815+
free(reencoded);
758816
return offset;
759817
}
760818

config.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,10 +309,16 @@ int git_default_config(const char *var, const char *value)
309309
}
310310

311311
if (!strcmp(var, "i18n.commitencoding")) {
312-
strlcpy(git_commit_encoding, value, sizeof(git_commit_encoding));
312+
git_commit_encoding = strdup(value);
313313
return 0;
314314
}
315315

316+
if (!strcmp(var, "i18n.logoutputencoding")) {
317+
git_log_output_encoding = strdup(value);
318+
return 0;
319+
}
320+
321+
316322
if (!strcmp(var, "pager.color") || !strcmp(var, "color.pager")) {
317323
pager_use_color = git_config_bool(var,value);
318324
return 0;

contrib/completion/git-completion.bash

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,7 @@ _git_repo_config ()
711711
core.compression
712712
core.legacyHeaders
713713
i18n.commitEncoding
714+
i18n.logOutputEncoding
714715
diff.color
715716
color.diff
716717
diff.renameLimit

environment.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ int prefer_symlink_refs;
1818
int log_all_ref_updates;
1919
int warn_ambiguous_refs = 1;
2020
int repository_format_version;
21-
char git_commit_encoding[MAX_ENCODING_LENGTH] = "utf-8";
21+
char *git_commit_encoding;
22+
char *git_log_output_encoding;
2223
int shared_repository = PERM_UMASK;
2324
const char *apply_default_whitespace;
2425
int zlib_compression_level = Z_DEFAULT_COMPRESSION;

revision.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ struct rev_info {
7272
const char *ref_message_id;
7373
const char *add_signoff;
7474
const char *extra_headers;
75+
const char *log_reencode;
7576

7677
/* Filter by commit log message */
7778
struct grep_opt *grep_filter;

0 commit comments

Comments
 (0)