Skip to content

Commit e2d2e38

Browse files
committed
Merge branch 'jc/maint-1.6.4-grep-lookahead' into jc/maint-grep-lookahead
* jc/maint-1.6.4-grep-lookahead: grep: optimize built-in grep by skipping lines that do not hit This needs to be an evil merge as fixmatch() changed signature since 5183bf6 (grep: Allow case insensitive search of fixed-strings, 2009-11-06). Signed-off-by: Junio C Hamano <gitster@pobox.com>
2 parents 902f235 + a26345b commit e2d2e38

File tree

1 file changed

+75
-0
lines changed

1 file changed

+75
-0
lines changed

grep.c

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,65 @@ static void show_pre_context(struct grep_opt *opt, const char *name, char *buf,
615615
}
616616
}
617617

618+
static int should_lookahead(struct grep_opt *opt)
619+
{
620+
struct grep_pat *p;
621+
622+
if (opt->extended)
623+
return 0; /* punt for too complex stuff */
624+
if (opt->invert)
625+
return 0;
626+
for (p = opt->pattern_list; p; p = p->next) {
627+
if (p->token != GREP_PATTERN)
628+
return 0; /* punt for "header only" and stuff */
629+
}
630+
return 1;
631+
}
632+
633+
static int look_ahead(struct grep_opt *opt,
634+
unsigned long *left_p,
635+
unsigned *lno_p,
636+
char **bol_p)
637+
{
638+
unsigned lno = *lno_p;
639+
char *bol = *bol_p;
640+
struct grep_pat *p;
641+
char *sp, *last_bol;
642+
regoff_t earliest = -1;
643+
644+
for (p = opt->pattern_list; p; p = p->next) {
645+
int hit;
646+
regmatch_t m;
647+
648+
if (p->fixed)
649+
hit = !fixmatch(p->pattern, bol, p->ignore_case, &m);
650+
else
651+
hit = !regexec(&p->regexp, bol, 1, &m, 0);
652+
if (!hit || m.rm_so < 0 || m.rm_eo < 0)
653+
continue;
654+
if (earliest < 0 || m.rm_so < earliest)
655+
earliest = m.rm_so;
656+
}
657+
658+
if (earliest < 0) {
659+
*bol_p = bol + *left_p;
660+
*left_p = 0;
661+
return 1;
662+
}
663+
for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
664+
; /* find the beginning of the line */
665+
last_bol = sp;
666+
667+
for (sp = bol; sp < last_bol; sp++) {
668+
if (*sp == '\n')
669+
lno++;
670+
}
671+
*left_p -= last_bol - bol;
672+
*bol_p = last_bol;
673+
*lno_p = lno;
674+
return 0;
675+
}
676+
618677
static int grep_buffer_1(struct grep_opt *opt, const char *name,
619678
char *buf, unsigned long size, int collect_hits)
620679
{
@@ -624,6 +683,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
624683
unsigned last_hit = 0;
625684
int binary_match_only = 0;
626685
unsigned count = 0;
686+
int try_lookahead = 0;
627687
enum grep_context ctx = GREP_CONTEXT_HEAD;
628688
xdemitconf_t xecfg;
629689

@@ -652,11 +712,26 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
652712
opt->priv = &xecfg;
653713
}
654714
}
715+
try_lookahead = should_lookahead(opt);
655716

656717
while (left) {
657718
char *eol, ch;
658719
int hit;
659720

721+
/*
722+
* look_ahead() skips quicly to the line that possibly
723+
* has the next hit; don't call it if we need to do
724+
* something more than just skipping the current line
725+
* in response to an unmatch for the current line. E.g.
726+
* inside a post-context window, we will show the current
727+
* line as a context around the previous hit when it
728+
* doesn't hit.
729+
*/
730+
if (try_lookahead
731+
&& !(last_hit
732+
&& lno <= last_hit + opt->post_context)
733+
&& look_ahead(opt, &left, &lno, &bol))
734+
break;
660735
eol = end_of_line(bol, &left);
661736
ch = *eol;
662737
*eol = 0;

0 commit comments

Comments
 (0)