gitweb: Better cutting matched string and its context

jnareb · gitster · commit b8d97d07fd50 · 2008-02-25T22:20:18.000-08:00
Improve look of commit search output ('search' view) by better cutting
of matched string and its context in match info, as suggested by Junio.
For example, if you are looking for "very long search string" in the
following line:

    Could somebody test this with very long search string, and see how

you would now see:

    ...this with &lt;&lt;very long ... string&gt;&gt;, and see...

instead of:

    Could som... &lt;&lt;very long search...&gt;&gt;, and see...

(where &lt;&lt;something&gt;&gt; denotes emphasized / colored fragment; matched
fragment to be more exact).

For this feature, support for fourth [optional] parameter to chop_str
subroutine was added.  This fourth parameter is used to denote where
to cut string to make it shorter.  chop_str can now cut at the
beginning (from the _left_ side of the string), in the middle
(_center_ of the string), or at the end (from the _right_ side of
the string); cutting from right is the default:

  chop_str(somestring, len, slop, 'left')    -&gt;  ' ...string'
  chop_str(somestring, len, slop, 'center')  -&gt;  'som ... ing'
  chop_str(somestring, len, slop, 'right')   -&gt;  'somestr... '

If you want to use default slop (default additional length), use undef
as value for third parameter to chop_str.

While at it, return from chop_str early if given string is so short
that chop_str couldn't shorten it.  Simplify also regexp used by
chop_str.  Make ellipsis (dots) stick to shortened fragment for
cutting at ends, to better see which part got shortened.

Simplify passing all arguments to chop_str in chop_and_escape_str
subroutine. This was needed to pass additional options to chop_str.

Signed-off-by: Jakub Narebski &lt;jnareb@gmail.com&gt;
Signed-off-by: Junio C Hamano &lt;gitster@pobox.com&gt;
diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
@@ -848,32 +848,73 @@ sub project_in_list {
 ## ----------------------------------------------------------------------
 ## HTML aware string manipulation
 
+# Try to chop given string on a word boundary between position
+# $len and $len+$add_len. If there is no word boundary there,
+# chop at $len+$add_len. Do not chop if chopped part plus ellipsis
+# (marking chopped part) would be longer than given string.
 sub chop_str {
 	my $str = shift;
 	my $len = shift;
 	my $add_len = shift || 10;
+	my $where = shift || 'right'; # 'left' | 'center' | 'right'
 
 	# allow only $len chars, but don't cut a word if it would fit in $add_len
 	# if it doesn't fit, cut it if it's still longer than the dots we would add
-	$str =~ m/^(.{0,$len}[^ \/\-_:\.@]{0,$add_len})(.*)/;
-	my $body = $1;
-	my $tail = $2;
-	if (length($tail) > 4) {
-		$tail = " ...";
-		$body =~ s/&[^;]*$//; # remove chopped character entities
+	# remove chopped character entities entirely
+
+	# when chopping in the middle, distribute $len into left and right part
+	# return early if chopping wouldn't make string shorter
+	if ($where eq 'center') {
+		return $str if ($len + 5 >= length($str)); # filler is length 5
+		$len = int($len/2);
+	} else {
+		return $str if ($len + 4 >= length($str)); # filler is length 4
+	}
+
+	# regexps: ending and beginning with word part up to $add_len
+	my $endre = qr/.{$len}\w{0,$add_len}/;
+	my $begre = qr/\w{0,$add_len}.{$len}/;
+
+	if ($where eq 'left') {
+		$str =~ m/^(.*?)($begre)$/;
+		my ($lead, $body) = ($1, $2);
+		if (length($lead) > 4) {
+			$body =~ s/^[^;]*;// if ($lead =~ m/&[^;]*$/);
+			$lead = " ...";
+		}
+		return "$lead$body";
+
+	} elsif ($where eq 'center') {
+		$str =~ m/^($endre)(.*)$/;
+		my ($left, $str)  = ($1, $2);
+		$str =~ m/^(.*?)($begre)$/;
+		my ($mid, $right) = ($1, $2);
+		if (length($mid) > 5) {
+			$left  =~ s/&[^;]*$//;
+			$right =~ s/^[^;]*;// if ($mid =~ m/&[^;]*$/);
+			$mid = " ... ";
+		}
+		return "$left$mid$right";
+
+	} else {
+		$str =~ m/^($endre)(.*)$/;
+		my $body = $1;
+		my $tail = $2;
+		if (length($tail) > 4) {
+			$body =~ s/&[^;]*$//;
+			$tail = "... ";
+		}
+		return "$body$tail";
 	}
-	return "$body$tail";
 }
 
 # takes the same arguments as chop_str, but also wraps a <span> around the
 # result with a title attribute if it does get chopped. Additionally, the
 # string is HTML-escaped.
 sub chop_and_escape_str {
-	my $str = shift;
-	my $len = shift;
-	my $add_len = shift || 10;
+	my ($str) = @_;
 
-	my $chopped = chop_str($str, $len, $add_len);
+	my $chopped = chop_str(@_);
 	if ($chopped eq $str) {
 		return esc_html($chopped);
 	} else {
@@ -3791,11 +3832,11 @@ sub git_search_grep_body {
 		foreach my $line (@$comment) {
 			if ($line =~ m/^(.*)($search_regexp)(.*)$/i) {
 				my ($lead, $match, $trail) = ($1, $2, $3);
-				$match = chop_str($match, 70, 5);       # in case match is very long
-				my $contextlen = int((80 - length($match))/2); # for the remainder
-				$contextlen = 30 if ($contextlen > 30); # but not too much
-				$lead  = chop_str($lead,  $contextlen, 10);
-				$trail = chop_str($trail, $contextlen, 10);
+				$match = chop_str($match, 70, 5, 'center');
+				my $contextlen = int((80 - length($match))/2);
+				$contextlen = 30 if ($contextlen > 30);
+				$lead  = chop_str($lead,  $contextlen, 10, 'left');
+				$trail = chop_str($trail, $contextlen, 10, 'right');
 
 				$lead  = esc_html($lead);
 				$match = esc_html($match);