Skip to content

Commit fcbcccb

Browse files
anson627sbinet
authored andcommitted
ARROW-3680: [Go] implement Float16 array
Author: Anson Qian <anson627@gmail.com> Author: Anson Qian <abq@uber.com> Closes apache#4083 from anson627/ARROW-3680 and squashes the following commits: ab5098f <Anson Qian> Define float16.Num as struct b970168 <Anson Qian> Rename NewFloat16 to New 4e7a837 <Anson Qian> Add test for float16 d2a9fde <Anson Qian> Rename to float16.Num df2645c <Anson Qian> Fix go.sum b4fd8bf <Anson Qian> Address code review 70453dc <Anson Qian> Address code review c19f906 <Anson Qian> Address code reviews 48f5446 <Anson Qian> Address code review 685a6f7 <Anson Qian> Move float16 to its own package 7cef978 <Anson Qian> Support both float16 and float32 version append and value/s 6f637ca <Anson Qian> Refactoring 744c0ae <Anson Qian> Fix header and typo d8d9b10 <Anson Qian> Fix format f9d7bfd <Anson Qian> ARROW-3680 implement Float16 array
1 parent a7dadb3 commit fcbcccb

10 files changed

Lines changed: 551 additions & 4 deletions

File tree

go/arrow/array/array.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ func init() {
174174
arrow.INT32: func(data *Data) Interface { return NewInt32Data(data) },
175175
arrow.UINT64: func(data *Data) Interface { return NewUint64Data(data) },
176176
arrow.INT64: func(data *Data) Interface { return NewInt64Data(data) },
177-
arrow.HALF_FLOAT: unsupportedArrayType,
177+
arrow.FLOAT16: func(data *Data) Interface { return NewFloat16Data(data) },
178178
arrow.FLOAT32: func(data *Data) Interface { return NewFloat32Data(data) },
179179
arrow.FLOAT64: func(data *Data) Interface { return NewFloat64Data(data) },
180180
arrow.STRING: func(data *Data) Interface { return NewStringData(data) },

go/arrow/array/builder.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,8 @@ func newBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
230230
return NewUint64Builder(mem)
231231
case arrow.INT64:
232232
return NewInt64Builder(mem)
233-
case arrow.HALF_FLOAT:
233+
case arrow.FLOAT16:
234+
return NewFloat16Builder(mem)
234235
case arrow.FLOAT32:
235236
return NewFloat32Builder(mem)
236237
case arrow.FLOAT64:

go/arrow/array/float16.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
package array
18+
19+
import (
20+
"fmt"
21+
"strings"
22+
23+
"github.com/apache/arrow/go/arrow"
24+
"github.com/apache/arrow/go/arrow/float16"
25+
)
26+
27+
// A type which represents an immutable sequence of Float16 values.
28+
type Float16 struct {
29+
array
30+
values []float16.Num
31+
}
32+
33+
func NewFloat16Data(data *Data) *Float16 {
34+
a := &Float16{}
35+
a.refCount = 1
36+
a.setData(data)
37+
return a
38+
}
39+
40+
func (a *Float16) Value(i int) float16.Num { return a.values[i] }
41+
42+
func (a *Float16) Values() []float16.Num { return a.values }
43+
44+
func (a *Float16) String() string {
45+
o := new(strings.Builder)
46+
o.WriteString("[")
47+
for i := 0; i < a.Len(); i++ {
48+
if i > 0 {
49+
fmt.Fprintf(o, " ")
50+
}
51+
switch {
52+
case a.IsNull(i):
53+
o.WriteString("(null)")
54+
default:
55+
fmt.Fprintf(o, "%v", a.values[i].Float32())
56+
}
57+
}
58+
o.WriteString("]")
59+
return o.String()
60+
}
61+
62+
func (a *Float16) setData(data *Data) {
63+
a.array.setData(data)
64+
vals := data.buffers[1]
65+
if vals != nil {
66+
a.values = arrow.Float16Traits.CastFromBytes(vals.Bytes())
67+
beg := a.array.data.offset
68+
end := beg + a.array.data.length
69+
a.values = a.values[beg:end]
70+
}
71+
}
72+
73+
var (
74+
_ Interface = (*Float16)(nil)
75+
)

go/arrow/array/float16_builder.go

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
package array
18+
19+
import (
20+
"sync/atomic"
21+
22+
"github.com/apache/arrow/go/arrow"
23+
"github.com/apache/arrow/go/arrow/float16"
24+
"github.com/apache/arrow/go/arrow/internal/bitutil"
25+
"github.com/apache/arrow/go/arrow/internal/debug"
26+
"github.com/apache/arrow/go/arrow/memory"
27+
)
28+
29+
type Float16Builder struct {
30+
builder
31+
32+
data *memory.Buffer
33+
rawData []float16.Num
34+
}
35+
36+
func NewFloat16Builder(mem memory.Allocator) *Float16Builder {
37+
return &Float16Builder{builder: builder{refCount: 1, mem: mem}}
38+
}
39+
40+
// Release decreases the reference count by 1.
41+
// When the reference count goes to zero, the memory is freed.
42+
func (b *Float16Builder) Release() {
43+
debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases")
44+
45+
if atomic.AddInt64(&b.refCount, -1) == 0 {
46+
if b.nullBitmap != nil {
47+
b.nullBitmap.Release()
48+
b.nullBitmap = nil
49+
}
50+
if b.data != nil {
51+
b.data.Release()
52+
b.data = nil
53+
b.rawData = nil
54+
}
55+
}
56+
}
57+
58+
func (b *Float16Builder) Append(v float16.Num) {
59+
b.Reserve(1)
60+
b.UnsafeAppend(v)
61+
}
62+
63+
func (b *Float16Builder) UnsafeAppend(v float16.Num) {
64+
bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
65+
b.rawData[b.length] = v
66+
b.length++
67+
}
68+
69+
func (b *Float16Builder) AppendNull() {
70+
b.Reserve(1)
71+
b.UnsafeAppendBoolToBitmap(false)
72+
}
73+
74+
func (b *Float16Builder) UnsafeAppendBoolToBitmap(isValid bool) {
75+
if isValid {
76+
bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
77+
} else {
78+
b.nulls++
79+
}
80+
b.length++
81+
}
82+
83+
// AppendValues will append the values in the v slice. The valid slice determines which values
84+
// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty,
85+
// all values in v are appended and considered valid.
86+
func (b *Float16Builder) AppendValues(v []float16.Num, valid []bool) {
87+
if len(v) != len(valid) && len(valid) != 0 {
88+
panic("len(v) != len(valid) && len(valid) != 0")
89+
}
90+
91+
if len(v) == 0 {
92+
return
93+
}
94+
95+
b.Reserve(len(v))
96+
if len(v) > 0 {
97+
arrow.Float16Traits.Copy(b.rawData[b.length:], v)
98+
}
99+
b.builder.unsafeAppendBoolsToBitmap(valid, len(v))
100+
}
101+
102+
func (b *Float16Builder) init(capacity int) {
103+
b.builder.init(capacity)
104+
105+
b.data = memory.NewResizableBuffer(b.mem)
106+
bytesN := arrow.Uint16Traits.BytesRequired(capacity)
107+
b.data.Resize(bytesN)
108+
b.rawData = arrow.Float16Traits.CastFromBytes(b.data.Bytes())
109+
}
110+
111+
// Reserve ensures there is enough space for appending n elements
112+
// by checking the capacity and calling Resize if necessary.
113+
func (b *Float16Builder) Reserve(n int) {
114+
b.builder.reserve(n, b.Resize)
115+
}
116+
117+
// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(),
118+
// additional memory will be allocated. If n is smaller, the allocated memory may reduced.
119+
func (b *Float16Builder) Resize(n int) {
120+
nBuilder := n
121+
if n < minBuilderCapacity {
122+
n = minBuilderCapacity
123+
}
124+
125+
if b.capacity == 0 {
126+
b.init(n)
127+
} else {
128+
b.builder.resize(nBuilder, b.init)
129+
b.data.Resize(arrow.Float16Traits.BytesRequired(n))
130+
b.rawData = arrow.Float16Traits.CastFromBytes(b.data.Bytes())
131+
}
132+
}
133+
134+
// NewArray creates a Float16 array from the memory buffers used by the builder and resets the Float16Builder
135+
// so it can be used to build a new array.
136+
func (b *Float16Builder) NewArray() Interface {
137+
return b.NewFloat16Array()
138+
}
139+
140+
// NewFloat16Array creates a Float16 array from the memory buffers used by the builder and resets the Float16Builder
141+
// so it can be used to build a new array.
142+
func (b *Float16Builder) NewFloat16Array() (a *Float16) {
143+
data := b.newData()
144+
a = NewFloat16Data(data)
145+
data.Release()
146+
return
147+
}
148+
149+
func (b *Float16Builder) newData() (data *Data) {
150+
bytesRequired := arrow.Float16Traits.BytesRequired(b.length)
151+
if bytesRequired > 0 && bytesRequired < b.data.Len() {
152+
// trim buffers
153+
b.data.Resize(bytesRequired)
154+
}
155+
data = NewData(arrow.FixedWidthTypes.Float16, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
156+
b.reset()
157+
158+
if b.data != nil {
159+
b.data.Release()
160+
b.data = nil
161+
b.rawData = nil
162+
}
163+
164+
return
165+
}
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
package array_test
18+
19+
import (
20+
"testing"
21+
22+
"github.com/apache/arrow/go/arrow/array"
23+
"github.com/apache/arrow/go/arrow/float16"
24+
"github.com/apache/arrow/go/arrow/memory"
25+
"github.com/stretchr/testify/assert"
26+
)
27+
28+
func float32Values(a *array.Float16) []float32 {
29+
values := make([]float32, a.Len())
30+
for i, v := range a.Values() {
31+
values[i] = v.Float32()
32+
}
33+
return values
34+
}
35+
36+
func TestNewFloat16Builder(t *testing.T) {
37+
mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
38+
defer mem.AssertSize(t, 0)
39+
40+
ab := array.NewFloat16Builder(mem)
41+
42+
ab.Append(float16.New(1))
43+
ab.Append(float16.New(2))
44+
ab.Append(float16.New(3))
45+
ab.AppendNull()
46+
ab.Append(float16.New(5))
47+
ab.Append(float16.New(6))
48+
ab.AppendNull()
49+
ab.Append(float16.New(8))
50+
ab.Append(float16.New(9))
51+
ab.Append(float16.New(10))
52+
53+
// check state of builder before NewFloat16Array
54+
assert.Equal(t, 10, ab.Len(), "unexpected Len()")
55+
assert.Equal(t, 2, ab.NullN(), "unexpected NullN()")
56+
57+
a := ab.NewFloat16Array()
58+
59+
// check state of builder after NewFloat16Array
60+
assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewFloat16Array did not reset state")
61+
assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewFloat16Array did not reset state")
62+
assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewFloat16Array did not reset state")
63+
64+
// check state of array
65+
assert.Equal(t, 2, a.NullN(), "unexpected null count")
66+
67+
assert.Equal(t, []float32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, float32Values(a), "unexpected Float16Values")
68+
assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity
69+
assert.Len(t, a.Values(), 10, "unexpected length of Float16Values")
70+
71+
a.Release()
72+
ab.Append(float16.New(7))
73+
ab.Append(float16.New(8))
74+
75+
a = ab.NewFloat16Array()
76+
77+
assert.Equal(t, 0, a.NullN())
78+
assert.Equal(t, []float32{7, 8}, float32Values(a))
79+
assert.Len(t, a.Values(), 2)
80+
81+
a.Release()
82+
}
83+
84+
func TestFloat16Builder_Empty(t *testing.T) {
85+
mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
86+
defer mem.AssertSize(t, 0)
87+
88+
ab := array.NewFloat16Builder(mem)
89+
defer ab.Release()
90+
91+
want := []float16.Num{float16.New(3), float16.New(4)}
92+
93+
ab.AppendValues([]float16.Num{}, nil)
94+
a := ab.NewFloat16Array()
95+
assert.Zero(t, a.Len())
96+
a.Release()
97+
98+
ab.AppendValues(nil, nil)
99+
a = ab.NewFloat16Array()
100+
assert.Zero(t, a.Len())
101+
a.Release()
102+
103+
ab.AppendValues(want, nil)
104+
a = ab.NewFloat16Array()
105+
assert.Equal(t, want, a.Values())
106+
a.Release()
107+
108+
ab.AppendValues([]float16.Num{}, nil)
109+
ab.AppendValues(want, nil)
110+
a = ab.NewFloat16Array()
111+
assert.Equal(t, want, a.Values())
112+
a.Release()
113+
114+
ab.AppendValues(want, nil)
115+
ab.AppendValues([]float16.Num{}, nil)
116+
a = ab.NewFloat16Array()
117+
assert.Equal(t, want, a.Values())
118+
a.Release()
119+
}

go/arrow/datatype.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ const (
5353
// INT64 is a Signed 64-bit little-endian integer
5454
INT64
5555

56-
// HALF_FLOAT is a 2-byte floating point value
57-
HALF_FLOAT
56+
// FLOAT16 is a 2-byte floating point value
57+
FLOAT16
5858

5959
// FLOAT32 is a 4-byte floating point value
6060
FLOAT32

0 commit comments

Comments
 (0)