Skip to content

Commit 0cec6db

Browse files
jnarebgitster
authored andcommitted
gitweb: Fix and simplify "split patch" detection
There are some cases when one line from "raw" git-diff output (raw format) corresponds to more than one patch in the patchset git-diff output; we call this situation "split patch". Old code misdetected subsequent patches (for different files) with the same pre-image and post-image as fragments of "split patch", leading to mislabeled from-file/to-file diff header etc. Old code used pre-image and post-image SHA-1 identifier ('from_id' and 'to_id') to check if current patch corresponds to old raw diff format line, to find if one difftree raw line coresponds to more than one patch in the patch format. Now we use post-image filename for that. This assumes that post-image filename alone can be used to identify difftree raw line. In the case this changes (which is unlikely considering current diff engine) we can add 'from_id' and 'to_id' to detect "patch splitting" together with 'to_file'. Because old code got pre-image and post-image SHA-1 identifier for the patch from the "index" line in extended diff header, diff header had to be buffered. New code takes post-image filename from "git diff" header, which is first line of a patch; this allows to simplify git_patchset_body code. A side effect of resigning diff header buffering is that there is always "diff extended_header" div, even if extended diff header is empty. Alternate solution would be to check when git splits patches, and do not check if parsed info from current patch corresponds to current or next raw diff format output line. Git splits patches only for 'T' (typechange) status filepair, and there always two patches corresponding to one raw diff line. It was not used because it would tie gitweb code to minute details of git diff output. While at it, use newly introduced parsed_difftree_line wrapper subroutine in git_difftree_body. Noticed-by: Yann Dirson <ydirson@altern.org> Diagnosed-by: Petr Baudis <pasky@suse.cz> Signed-off-by: Jakub Narebski <jnareb@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 09149c7 commit 0cec6db

File tree

1 file changed

+67
-85
lines changed

1 file changed

+67
-85
lines changed

gitweb/gitweb.perl

Lines changed: 67 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -2000,6 +2000,19 @@ sub parse_difftree_raw_line {
20002000
return wantarray ? %res : \%res;
20012001
}
20022002

2003+
# wrapper: return parsed line of git-diff-tree "raw" output
2004+
# (the argument might be raw line, or parsed info)
2005+
sub parsed_difftree_line {
2006+
my $line_or_ref = shift;
2007+
2008+
if (ref($line_or_ref) eq "HASH") {
2009+
# pre-parsed (or generated by hand)
2010+
return $line_or_ref;
2011+
} else {
2012+
return parse_difftree_raw_line($line_or_ref);
2013+
}
2014+
}
2015+
20032016
# parse line of git-ls-tree output
20042017
sub parse_ls_tree_line ($;%) {
20052018
my $line = shift;
@@ -2043,6 +2056,7 @@ sub parse_from_to_diffinfo {
20432056
}
20442057
}
20452058
} else {
2059+
# ordinary (not combined) diff
20462060
$from->{'file'} = $diffinfo->{'from_file'} || $diffinfo->{'file'};
20472061
if ($diffinfo->{'status'} ne "A") { # not new (added) file
20482062
$from->{'href'} = href(action=>"blob", hash_base=>$hash_parent,
@@ -2766,6 +2780,7 @@ sub git_print_tree_entry {
27662780
## ......................................................................
27672781
## functions printing large fragments of HTML
27682782

2783+
# get pre-image filenames for merge (combined) diff
27692784
sub fill_from_file_info {
27702785
my ($diff, @parents) = @_;
27712786

@@ -2782,28 +2797,25 @@ sub fill_from_file_info {
27822797
return $diff;
27832798
}
27842799

2785-
# parameters can be strings, or references to arrays of strings
2786-
sub from_ids_eq {
2787-
my ($a, $b) = @_;
2788-
2789-
if (ref($a) eq "ARRAY" && ref($b) eq "ARRAY" && @$a == @$b) {
2790-
for (my $i = 0; $i < @$a; ++$i) {
2791-
return 0 unless ($a->[$i] eq $b->[$i]);
2792-
}
2793-
return 1;
2794-
} elsif (!ref($a) && !ref($b)) {
2795-
return $a eq $b;
2796-
} else {
2797-
return 0;
2798-
}
2799-
}
2800-
2800+
# is current raw difftree line of file deletion
28012801
sub is_deleted {
28022802
my $diffinfo = shift;
28032803

28042804
return $diffinfo->{'to_id'} eq ('0' x 40);
28052805
}
28062806

2807+
# does patch correspond to [previous] difftree raw line
2808+
# $diffinfo - hashref of parsed raw diff format
2809+
# $patchinfo - hashref of parsed patch diff format
2810+
# (the same keys as in $diffinfo)
2811+
sub is_patch_split {
2812+
my ($diffinfo, $patchinfo) = @_;
2813+
2814+
return defined $diffinfo && defined $patchinfo
2815+
&& ($diffinfo->{'to_file'} || $diffinfo->{'file'}) eq $patchinfo->{'to_file'};
2816+
}
2817+
2818+
28072819
sub git_difftree_body {
28082820
my ($difftree, $hash, @parents) = @_;
28092821
my ($parent) = $parents[0];
@@ -2840,13 +2852,7 @@ sub git_difftree_body {
28402852
my $alternate = 1;
28412853
my $patchno = 0;
28422854
foreach my $line (@{$difftree}) {
2843-
my $diff;
2844-
if (ref($line) eq "HASH") {
2845-
# pre-parsed (or generated by hand)
2846-
$diff = $line;
2847-
} else {
2848-
$diff = parse_difftree_raw_line($line);
2849-
}
2855+
my $diff = parsed_difftree_line($line);
28502856

28512857
if ($alternate) {
28522858
print "<tr class=\"dark\">\n";
@@ -3117,10 +3123,12 @@ sub git_patchset_body {
31173123
my ($fd, $difftree, $hash, @hash_parents) = @_;
31183124
my ($hash_parent) = $hash_parents[0];
31193125

3126+
my $is_combined = (@hash_parents > 1);
31203127
my $patch_idx = 0;
31213128
my $patch_number = 0;
31223129
my $patch_line;
31233130
my $diffinfo;
3131+
my $to_name;
31243132
my (%from, %to);
31253133

31263134
print "<div class=\"patchset\">\n";
@@ -3134,73 +3142,46 @@ sub git_patchset_body {
31343142

31353143
PATCH:
31363144
while ($patch_line) {
3137-
my @diff_header;
3138-
my ($from_id, $to_id);
3139-
3140-
# git diff header
3141-
#assert($patch_line =~ m/^diff /) if DEBUG;
3142-
#assert($patch_line !~ m!$/$!) if DEBUG; # is chomp-ed
3143-
$patch_number++;
3144-
push @diff_header, $patch_line;
3145-
3146-
# extended diff header
3147-
EXTENDED_HEADER:
3148-
while ($patch_line = <$fd>) {
3149-
chomp $patch_line;
31503145

3151-
last EXTENDED_HEADER if ($patch_line =~ m/^--- |^diff /);
3152-
3153-
if ($patch_line =~ m/^index ([0-9a-fA-F]{40})..([0-9a-fA-F]{40})/) {
3154-
$from_id = $1;
3155-
$to_id = $2;
3156-
} elsif ($patch_line =~ m/^index ((?:[0-9a-fA-F]{40},)+[0-9a-fA-F]{40})..([0-9a-fA-F]{40})/) {
3157-
$from_id = [ split(',', $1) ];
3158-
$to_id = $2;
3159-
}
3160-
3161-
push @diff_header, $patch_line;
3146+
# parse "git diff" header line
3147+
if ($patch_line =~ m/^diff --git (\"(?:[^\\\"]*(?:\\.[^\\\"]*)*)\"|[^ "]*) (.*)$/) {
3148+
# $1 is from_name, which we do not use
3149+
$to_name = unquote($2);
3150+
$to_name =~ s!^b/!!;
3151+
} elsif ($patch_line =~ m/^diff --(cc|combined) ("?.*"?)$/) {
3152+
# $1 is 'cc' or 'combined', which we do not use
3153+
$to_name = unquote($2);
3154+
} else {
3155+
$to_name = undef;
31623156
}
3163-
my $last_patch_line = $patch_line;
31643157

31653158
# check if current patch belong to current raw line
31663159
# and parse raw git-diff line if needed
3167-
if (defined $diffinfo &&
3168-
defined $from_id && defined $to_id &&
3169-
from_ids_eq($diffinfo->{'from_id'}, $from_id) &&
3170-
$diffinfo->{'to_id'} eq $to_id) {
3160+
if (is_patch_split($diffinfo, { 'to_file' => $to_name })) {
31713161
# this is continuation of a split patch
31723162
print "<div class=\"patch cont\">\n";
31733163
} else {
31743164
# advance raw git-diff output if needed
31753165
$patch_idx++ if defined $diffinfo;
31763166

3177-
# compact combined diff output can have some patches skipped
3178-
# find which patch (using pathname of result) we are at now
3179-
my $to_name;
3180-
if ($diff_header[0] =~ m!^diff --cc "?(.*)"?$!) {
3181-
$to_name = $1;
3182-
}
3183-
3184-
do {
3185-
# read and prepare patch information
3186-
if (ref($difftree->[$patch_idx]) eq "HASH") {
3187-
# pre-parsed (or generated by hand)
3188-
$diffinfo = $difftree->[$patch_idx];
3189-
} else {
3190-
$diffinfo = parse_difftree_raw_line($difftree->[$patch_idx]);
3191-
}
3167+
# read and prepare patch information
3168+
$diffinfo = parsed_difftree_line($difftree->[$patch_idx]);
31923169

3193-
# check if current raw line has no patch (it got simplified)
3194-
if (defined $to_name && $to_name ne $diffinfo->{'to_file'}) {
3170+
# compact combined diff output can have some patches skipped
3171+
# find which patch (using pathname of result) we are at now;
3172+
if ($is_combined) {
3173+
while ($to_name ne $diffinfo->{'to_file'}) {
31953174
print "<div class=\"patch\" id=\"patch". ($patch_idx+1) ."\">\n" .
31963175
format_diff_cc_simplified($diffinfo, @hash_parents) .
31973176
"</div>\n"; # class="patch"
31983177

31993178
$patch_idx++;
32003179
$patch_number++;
3180+
3181+
last if $patch_idx > $#$difftree;
3182+
$diffinfo = parsed_difftree_line($difftree->[$patch_idx]);
32013183
}
3202-
} until (!defined $to_name || $to_name eq $diffinfo->{'to_file'} ||
3203-
$patch_idx > $#$difftree);
3184+
}
32043185

32053186
# modifies %from, %to hashes
32063187
parse_from_to_diffinfo($diffinfo, \%from, \%to, @hash_parents);
@@ -3210,30 +3191,36 @@ sub git_patchset_body {
32103191
print "<div class=\"patch\" id=\"patch". ($patch_idx+1) ."\">\n";
32113192
}
32123193

3194+
# git diff header
3195+
#assert($patch_line =~ m/^diff /) if DEBUG;
3196+
#assert($patch_line !~ m!$/$!) if DEBUG; # is chomp-ed
3197+
$patch_number++;
32133198
# print "git diff" header
3214-
$patch_line = shift @diff_header;
32153199
print format_git_diff_header_line($patch_line, $diffinfo,
32163200
\%from, \%to);
32173201

32183202
# print extended diff header
3219-
print "<div class=\"diff extended_header\">\n" if (@diff_header > 0);
3203+
print "<div class=\"diff extended_header\">\n";
32203204
EXTENDED_HEADER:
3221-
foreach $patch_line (@diff_header) {
3205+
while ($patch_line = <$fd>) {
3206+
chomp $patch_line;
3207+
3208+
last EXTENDED_HEADER if ($patch_line =~ m/^--- |^diff /);
3209+
32223210
print format_extended_diff_header_line($patch_line, $diffinfo,
32233211
\%from, \%to);
32243212
}
3225-
print "</div>\n" if (@diff_header > 0); # class="diff extended_header"
3213+
print "</div>\n"; # class="diff extended_header"
32263214

32273215
# from-file/to-file diff header
3228-
$patch_line = $last_patch_line;
32293216
if (! $patch_line) {
32303217
print "</div>\n"; # class="patch"
32313218
last PATCH;
32323219
}
32333220
next PATCH if ($patch_line =~ m/^diff /);
32343221
#assert($patch_line =~ m/^---/) if DEBUG;
3235-
#assert($patch_line eq $last_patch_line) if DEBUG;
32363222

3223+
my $last_patch_line = $patch_line;
32373224
$patch_line = <$fd>;
32383225
chomp $patch_line;
32393226
#assert($patch_line =~ m/^\+\+\+/) if DEBUG;
@@ -3258,16 +3245,11 @@ sub git_patchset_body {
32583245

32593246
# for compact combined (--cc) format, with chunk and patch simpliciaction
32603247
# patchset might be empty, but there might be unprocessed raw lines
3261-
for ($patch_idx++ if $patch_number > 0;
3248+
for (++$patch_idx if $patch_number > 0;
32623249
$patch_idx < @$difftree;
3263-
$patch_idx++) {
3250+
++$patch_idx) {
32643251
# read and prepare patch information
3265-
if (ref($difftree->[$patch_idx]) eq "HASH") {
3266-
# pre-parsed (or generated by hand)
3267-
$diffinfo = $difftree->[$patch_idx];
3268-
} else {
3269-
$diffinfo = parse_difftree_raw_line($difftree->[$patch_idx]);
3270-
}
3252+
$diffinfo = parsed_difftree_line($difftree->[$patch_idx]);
32713253

32723254
# generate anchor for "patch" links in difftree / whatchanged part
32733255
print "<div class=\"patch\" id=\"patch". ($patch_idx+1) ."\">\n" .

0 commit comments

Comments
 (0)