Skip to content

Commit ee58ecc

Browse files
committed
internal/bytealg: move short string Index implementations into bytealg
Also move the arm64 CountByte implementation while we're here. Fixes golang#19792 Change-Id: I1e0fdf1e03e3135af84150a2703b58dad1b0d57e Reviewed-on: https://go-review.googlesource.com/98518 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
1 parent f6332bb commit ee58ecc

27 files changed

+932
-1123
lines changed

src/bytes/bytes.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,92 @@ func EqualFold(s, t []byte) bool {
829829
return len(s) == len(t)
830830
}
831831

832+
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
833+
func Index(s, sep []byte) int {
834+
n := len(sep)
835+
switch {
836+
case n == 0:
837+
return 0
838+
case n == 1:
839+
return IndexByte(s, sep[0])
840+
case n == len(s):
841+
if Equal(sep, s) {
842+
return 0
843+
}
844+
return -1
845+
case n > len(s):
846+
return -1
847+
case n <= bytealg.MaxLen:
848+
// Use brute force when s and sep both are small
849+
if len(s) <= bytealg.MaxBruteForce {
850+
return bytealg.Index(s, sep)
851+
}
852+
c := sep[0]
853+
i := 0
854+
t := s[:len(s)-n+1]
855+
fails := 0
856+
for i < len(t) {
857+
if t[i] != c {
858+
// IndexByte is faster than bytealg.Index, so use it as long as
859+
// we're not getting lots of false positives.
860+
o := IndexByte(t[i:], c)
861+
if o < 0 {
862+
return -1
863+
}
864+
i += o
865+
}
866+
if Equal(s[i:i+n], sep) {
867+
return i
868+
}
869+
fails++
870+
i++
871+
// Switch to bytealg.Index when IndexByte produces too many false positives.
872+
if fails > bytealg.Cutover(i) {
873+
r := bytealg.Index(s[i:], sep)
874+
if r >= 0 {
875+
return r + i
876+
}
877+
return -1
878+
}
879+
}
880+
return -1
881+
}
882+
c := sep[0]
883+
i := 0
884+
fails := 0
885+
t := s[:len(s)-n+1]
886+
for i < len(t) {
887+
if t[i] != c {
888+
o := IndexByte(t[i:], c)
889+
if o < 0 {
890+
break
891+
}
892+
i += o
893+
}
894+
if Equal(s[i:i+n], sep) {
895+
return i
896+
}
897+
i++
898+
fails++
899+
if fails >= 4+i>>4 && i < len(t) {
900+
// Give up on IndexByte, it isn't skipping ahead
901+
// far enough to be better than Rabin-Karp.
902+
// Experiments (using IndexPeriodic) suggest
903+
// the cutover is about 16 byte skips.
904+
// TODO: if large prefixes of sep are matching
905+
// we should cutover at even larger average skips,
906+
// because Equal becomes that much more expensive.
907+
// This code does not take that effect into account.
908+
j := indexRabinKarp(s[i:], sep)
909+
if j < 0 {
910+
return -1
911+
}
912+
return i + j
913+
}
914+
}
915+
return -1
916+
}
917+
832918
func indexRabinKarp(s, sep []byte) int {
833919
// Rabin-Karp search
834920
hashsep, pow := hashStr(sep)

src/bytes/bytes_amd64.go

Lines changed: 0 additions & 79 deletions
This file was deleted.

src/bytes/bytes_arm64.go

Lines changed: 0 additions & 72 deletions
This file was deleted.

src/bytes/bytes_generic.go

Lines changed: 0 additions & 59 deletions
This file was deleted.

src/bytes/bytes_s390x.go

Lines changed: 0 additions & 80 deletions
This file was deleted.

0 commit comments

Comments
 (0)