Skip to content

Commit b075d1f

Browse files
egonelbrenigeltao
authored andcommitted
encoding/base64: Optimize EncodeToString and DecodeString.
benchmark old ns/op new ns/op delta BenchmarkEncodeToString 31281 23821 -23.85% BenchmarkDecodeString 156508 82254 -47.44% benchmark old MB/s new MB/s speedup BenchmarkEncodeToString 261.88 343.89 1.31x BenchmarkDecodeString 69.80 132.81 1.90x Change-Id: I115e0b18c3a6d5ef6bfdcb3f637644f02f290907 Reviewed-on: https://go-review.googlesource.com/8808 Reviewed-by: Nigel Tao <nigeltao@golang.org>
1 parent 04829a4 commit b075d1f

File tree

1 file changed

+83
-72
lines changed

1 file changed

+83
-72
lines changed

src/encoding/base64/base64.go

Lines changed: 83 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,8 @@
66
package base64
77

88
import (
9-
"bytes"
109
"io"
1110
"strconv"
12-
"strings"
1311
)
1412

1513
/*
@@ -22,7 +20,7 @@ import (
2220
// (RFC 1421). RFC 4648 also defines an alternate encoding, which is
2321
// the standard encoding with - and _ substituted for + and /.
2422
type Encoding struct {
25-
encode string
23+
encode [64]byte
2624
decodeMap [256]byte
2725
padChar rune
2826
}
@@ -40,9 +38,14 @@ const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678
4038
// The resulting Encoding uses the default padding character ('='),
4139
// which may be changed or disabled via WithPadding.
4240
func NewEncoding(encoder string) *Encoding {
41+
if len(encoder) != 64 {
42+
panic("encoding alphabet is not 64-bytes long")
43+
}
44+
4345
e := new(Encoding)
44-
e.encode = encoder
4546
e.padChar = StdPadding
47+
copy(e.encode[:], encoder)
48+
4649
for i := 0; i < len(e.decodeMap); i++ {
4750
e.decodeMap[i] = 0xFF
4851
}
@@ -77,13 +80,6 @@ var RawStdEncoding = StdEncoding.WithPadding(NoPadding)
7780
// This is the same as URLEncoding but omits padding characters.
7881
var RawURLEncoding = URLEncoding.WithPadding(NoPadding)
7982

80-
var removeNewlinesMapper = func(r rune) rune {
81-
if r == '\r' || r == '\n' {
82-
return -1
83-
}
84-
return r
85-
}
86-
8783
/*
8884
* Encoder
8985
*/
@@ -99,46 +95,45 @@ func (enc *Encoding) Encode(dst, src []byte) {
9995
return
10096
}
10197

102-
for len(src) > 0 {
103-
var b0, b1, b2, b3 byte
98+
di, si := 0, 0
99+
n := (len(src) / 3) * 3
100+
for si < n {
101+
// Convert 3x 8bit source bytes into 4 bytes
102+
val := uint(src[si+0])<<16 | uint(src[si+1])<<8 | uint(src[si+2])
104103

105-
// Unpack 4x 6-bit source blocks into a 4 byte
106-
// destination quantum
107-
switch len(src) {
108-
default:
109-
b3 = src[2] & 0x3F
110-
b2 = src[2] >> 6
111-
fallthrough
112-
case 2:
113-
b2 |= (src[1] << 2) & 0x3F
114-
b1 = src[1] >> 4
115-
fallthrough
116-
case 1:
117-
b1 |= (src[0] << 4) & 0x3F
118-
b0 = src[0] >> 2
119-
}
104+
dst[di+0] = enc.encode[val>>18&0x3F]
105+
dst[di+1] = enc.encode[val>>12&0x3F]
106+
dst[di+2] = enc.encode[val>>6&0x3F]
107+
dst[di+3] = enc.encode[val&0x3F]
120108

121-
// Encode 6-bit blocks using the base64 alphabet
122-
dst[0] = enc.encode[b0]
123-
dst[1] = enc.encode[b1]
124-
if len(src) >= 3 {
125-
dst[2] = enc.encode[b2]
126-
dst[3] = enc.encode[b3]
127-
} else { // Final incomplete quantum
128-
if len(src) >= 2 {
129-
dst[2] = enc.encode[b2]
130-
}
131-
if enc.padChar != NoPadding {
132-
if len(src) < 2 {
133-
dst[2] = byte(enc.padChar)
134-
}
135-
dst[3] = byte(enc.padChar)
136-
}
137-
break
138-
}
109+
si += 3
110+
di += 4
111+
}
139112

140-
src = src[3:]
141-
dst = dst[4:]
113+
remain := len(src) - si
114+
if remain == 0 {
115+
return
116+
}
117+
// Add the remaining small block
118+
val := uint(src[si+0]) << 16
119+
if remain == 2 {
120+
val |= uint(src[si+1]) << 8
121+
}
122+
123+
dst[di+0] = enc.encode[val>>18&0x3F]
124+
dst[di+1] = enc.encode[val>>12&0x3F]
125+
126+
switch remain {
127+
case 2:
128+
dst[di+2] = enc.encode[val>>6&0x3F]
129+
if enc.padChar != NoPadding {
130+
dst[di+3] = byte(enc.padChar)
131+
}
132+
case 1:
133+
if enc.padChar != NoPadding {
134+
dst[di+2] = byte(enc.padChar)
135+
dst[di+3] = byte(enc.padChar)
136+
}
142137
}
143138
}
144139

@@ -248,67 +243,83 @@ func (e CorruptInputError) Error() string {
248243

249244
// decode is like Decode but returns an additional 'end' value, which
250245
// indicates if end-of-message padding or a partial quantum was encountered
251-
// and thus any additional data is an error. This method assumes that src has been
252-
// stripped of all supported whitespace ('\r' and '\n').
246+
// and thus any additional data is an error.
253247
func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
254-
olen := len(src)
255-
for len(src) > 0 && !end {
248+
si := 0
249+
250+
// skip over newlines
251+
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
252+
si++
253+
}
254+
255+
for si < len(src) && !end {
256256
// Decode quantum using the base64 alphabet
257257
var dbuf [4]byte
258258
dinc, dlen := 3, 4
259259

260260
for j := range dbuf {
261-
if len(src) == 0 {
261+
if len(src) == si {
262262
if enc.padChar != NoPadding || j < 2 {
263-
return n, false, CorruptInputError(olen - len(src) - j)
263+
return n, false, CorruptInputError(si - j)
264264
}
265265
dinc, dlen, end = j-1, j, true
266266
break
267267
}
268-
in := src[0]
269-
src = src[1:]
268+
in := src[si]
269+
270+
si++
271+
// skip over newlines
272+
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
273+
si++
274+
}
275+
270276
if rune(in) == enc.padChar {
271277
// We've reached the end and there's padding
272278
switch j {
273279
case 0, 1:
274280
// incorrect padding
275-
return n, false, CorruptInputError(olen - len(src) - 1)
281+
return n, false, CorruptInputError(si - 1)
276282
case 2:
277283
// "==" is expected, the first "=" is already consumed.
278-
if len(src) == 0 {
284+
if si == len(src) {
279285
// not enough padding
280-
return n, false, CorruptInputError(olen)
286+
return n, false, CorruptInputError(len(src))
281287
}
282-
if rune(src[0]) != enc.padChar {
288+
if rune(src[si]) != enc.padChar {
283289
// incorrect padding
284-
return n, false, CorruptInputError(olen - len(src) - 1)
290+
return n, false, CorruptInputError(si - 1)
291+
}
292+
293+
si++
294+
// skip over newlines
295+
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
296+
si++
285297
}
286-
src = src[1:]
287298
}
288-
if len(src) > 0 {
299+
if si < len(src) {
289300
// trailing garbage
290-
err = CorruptInputError(olen - len(src))
301+
err = CorruptInputError(si)
291302
}
292303
dinc, dlen, end = 3, j, true
293304
break
294305
}
295306
dbuf[j] = enc.decodeMap[in]
296307
if dbuf[j] == 0xFF {
297-
return n, false, CorruptInputError(olen - len(src) - 1)
308+
return n, false, CorruptInputError(si - 1)
298309
}
299310
}
300311

301-
// Pack 4x 6-bit source blocks into 3 byte destination
302-
// quantum
312+
// Convert 4x 6bit source bytes into 3 bytes
313+
val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3])
303314
switch dlen {
304315
case 4:
305-
dst[2] = dbuf[2]<<6 | dbuf[3]
316+
dst[2] = byte(val >> 0)
306317
fallthrough
307318
case 3:
308-
dst[1] = dbuf[1]<<4 | dbuf[2]>>2
319+
dst[1] = byte(val >> 8)
309320
fallthrough
310321
case 2:
311-
dst[0] = dbuf[0]<<2 | dbuf[1]>>4
322+
dst[0] = byte(val >> 16)
312323
}
313324
dst = dst[dinc:]
314325
n += dlen - 1
@@ -323,14 +334,12 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
323334
// number of bytes successfully written and CorruptInputError.
324335
// New line characters (\r and \n) are ignored.
325336
func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
326-
src = bytes.Map(removeNewlinesMapper, src)
327337
n, _, err = enc.decode(dst, src)
328338
return
329339
}
330340

331341
// DecodeString returns the bytes represented by the base64 string s.
332342
func (enc *Encoding) DecodeString(s string) ([]byte, error) {
333-
s = strings.Map(removeNewlinesMapper, s)
334343
dbuf := make([]byte, enc.DecodedLen(len(s)))
335344
n, _, err := enc.decode(dbuf, []byte(s))
336345
return dbuf[:n], err
@@ -359,6 +368,8 @@ func (d *decoder) Read(p []byte) (n int, err error) {
359368
return n, nil
360369
}
361370

371+
// This code assumes that d.r strips supported whitespace ('\r' and '\n').
372+
362373
// Read a chunk.
363374
nn := len(p) / 3 * 4
364375
if nn < 4 {

0 commit comments

Comments
 (0)