9494# (relative to the current git repository)
9595our $mimetypes_file = undef ;
9696
97+ # assume this charset if line contains non-UTF-8 characters;
98+ # it should be valid encoding (see Encoding::Supported(3pm) for list),
99+ # for which encoding all byte sequences are valid, for example
100+ # 'iso-8859-1' aka 'latin1' (it is decoded without checking, so it
101+ # could be even 'utf-8' for the old behavior)
102+ our $fallback_encoding = ' latin1' ;
103+
97104# You define site-wide feature defaults here; override them with
98105# $GITWEB_CONFIG as necessary.
99106our %feature = (
@@ -602,6 +609,20 @@ sub validate_refname {
602609 return $input ;
603610}
604611
612+ # decode sequences of octets in utf8 into Perl's internal form,
613+ # which is utf-8 with utf8 flag set if needed. gitweb writes out
614+ # in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning
615+ sub to_utf8 {
616+ my $str = shift ;
617+ my $res ;
618+ eval { $res = decode_utf8($str , Encode::FB_CROAK); };
619+ if (defined $res ) {
620+ return $res ;
621+ } else {
622+ return decode($fallback_encoding , $str , Encode::FB_DEFAULT);
623+ }
624+ }
625+
605626# quote unsafe chars, but keep the slash, even when it's not
606627# correct, but quoted slashes look too horrible in bookmarks
607628sub esc_param {
@@ -626,7 +647,7 @@ ($;%)
626647 my $str = shift ;
627648 my %opts = @_ ;
628649
629- $str = decode_utf8 ($str );
650+ $str = to_utf8 ($str );
630651 $str = $cgi -> escapeHTML($str );
631652 if ($opts {' -nbsp' }) {
632653 $str =~ s / / / g ;
@@ -640,7 +661,7 @@ sub esc_path {
640661 my $str = shift ;
641662 my %opts = @_ ;
642663
643- $str = decode_utf8 ($str );
664+ $str = to_utf8 ($str );
644665 $str = $cgi -> escapeHTML($str );
645666 if ($opts {' -nbsp' }) {
646667 $str =~ s / / / g ;
@@ -925,7 +946,7 @@ sub format_subject_html {
925946
926947 if (length ($short ) < length ($long )) {
927948 return $cgi -> a({-href => $href , -class => " list subject" ,
928- -title => decode_utf8 ($long )},
949+ -title => to_utf8 ($long )},
929950 esc_html($short ) . $extra );
930951 } else {
931952 return $cgi -> a({-href => $href , -class => " list subject" },
@@ -1239,7 +1260,7 @@ sub git_get_projects_list {
12391260 if (check_export_ok(" $projectroot /$path " )) {
12401261 my $pr = {
12411262 path => $path ,
1242- owner => decode_utf8 ($owner ),
1263+ owner => to_utf8 ($owner ),
12431264 };
12441265 push @list , $pr ;
12451266 (my $forks_path = $path ) =~ s /\. git$// ;
@@ -1269,7 +1290,7 @@ sub git_get_project_owner {
12691290 $pr = unescape($pr );
12701291 $ow = unescape($ow );
12711292 if ($pr eq $project ) {
1272- $owner = decode_utf8 ($ow );
1293+ $owner = to_utf8 ($ow );
12731294 last ;
12741295 }
12751296 }
@@ -1759,7 +1780,7 @@ sub get_file_owner {
17591780 }
17601781 my $owner = $gcos ;
17611782 $owner =~ s / [,;].*$// ;
1762- return decode_utf8 ($owner );
1783+ return to_utf8 ($owner );
17631784}
17641785
17651786# # ......................................................................
@@ -1842,7 +1863,7 @@ sub git_header_html {
18421863
18431864 my $title = " $site_name " ;
18441865 if (defined $project ) {
1845- $title .= " - " . decode_utf8 ($project );
1866+ $title .= " - " . to_utf8 ($project );
18461867 if (defined $action ) {
18471868 $title .= " /$action " ;
18481869 if (defined $file_name ) {
@@ -2116,7 +2137,7 @@ sub git_print_page_path {
21162137
21172138 print " <div class=\" page_path\" >" ;
21182139 print $cgi -> a({-href => href(action => " tree" , hash_base => $hb ),
2119- -title => ' tree root' }, decode_utf8 (" [$project ]" ));
2140+ -title => ' tree root' }, to_utf8 (" [$project ]" ));
21202141 print " / " ;
21212142 if (defined $name ) {
21222143 my @dirname = split ' /' , $name ;
@@ -2936,7 +2957,7 @@ sub git_project_list_body {
29362957 ($pr -> {' age' }, $pr -> {' age_string' }) = @aa ;
29372958 if (!defined $pr -> {' descr' }) {
29382959 my $descr = git_get_project_description($pr -> {' path' }) || " " ;
2939- $pr -> {' descr_long' } = decode_utf8 ($descr );
2960+ $pr -> {' descr_long' } = to_utf8 ($descr );
29402961 $pr -> {' descr' } = chop_str($descr , 25, 5);
29412962 }
29422963 if (!defined $pr -> {' owner' }) {
@@ -3981,7 +4002,7 @@ sub git_snapshot {
39814002 my $git = git_cmd_str();
39824003 my $name = $project ;
39834004 $name =~ s /\047 / \047\\\047\047 / g ;
3984- my $filename = decode_utf8 (basename($project ));
4005+ my $filename = to_utf8 (basename($project ));
39854006 my $cmd ;
39864007 if ($suffix eq ' zip' ) {
39874008 $filename .= " -$hash .$suffix " ;
0 commit comments