@@ -18,11 +18,14 @@ package kernels
1818
1919import (
2020 "fmt"
21+ "math"
2122
2223 "github.com/apache/arrow/go/v10/arrow"
2324 "github.com/apache/arrow/go/v10/arrow/array"
2425 "github.com/apache/arrow/go/v10/arrow/bitutil"
2526 "github.com/apache/arrow/go/v10/arrow/compute/internal/exec"
27+ "github.com/apache/arrow/go/v10/arrow/internal/debug"
28+ "github.com/apache/arrow/go/v10/arrow/memory"
2629 "github.com/apache/arrow/go/v10/internal/bitutils"
2730)
2831
@@ -87,6 +90,149 @@ func preallocateData(ctx *exec.KernelCtx, length int64, bitWidth int, allocateVa
8790 }
8891}
8992
93+ type builder [T any ] interface {
94+ array.Builder
95+ Append (T )
96+ UnsafeAppend (T )
97+ UnsafeAppendBoolToBitmap (bool )
98+ }
99+
100+ func getTakeIndices [T exec.IntTypes | exec.UintTypes ](mem memory.Allocator , filter * exec.ArraySpan , nullSelect NullSelectionBehavior ) arrow.ArrayData {
101+ var (
102+ filterData = filter .Buffers [1 ].Buf
103+ haveFilterNulls = filter .MayHaveNulls ()
104+ filterIsValid = filter .Buffers [0 ].Buf
105+ idxType = exec .GetDataType [T ]()
106+ )
107+
108+ if haveFilterNulls && nullSelect == EmitNulls {
109+ // Most complex case: the filter may have nulls and we don't drop them.
110+ // The logic is ternary:
111+ // - filter is null: emit null
112+ // - filter is valid and true: emit index
113+ // - filter is valid and false: don't emit anything
114+
115+ bldr := array .NewBuilder (mem , idxType ).(builder [T ])
116+ defer bldr .Release ()
117+
118+ // position relative to start of filter
119+ var pos T
120+ // current position taking the filter offset into account
121+ posWithOffset := filter .Offset
122+
123+ // to count blocks where filterData[i] || !filterIsValid[i]
124+ filterCounter := bitutils .NewBinaryBitBlockCounter (filterData , filterIsValid , filter .Offset , filter .Offset , filter .Len )
125+ isValidCounter := bitutils .NewBitBlockCounter (filterIsValid , filter .Offset , filter .Len )
126+ for int64 (pos ) < filter .Len {
127+ // true OR NOT valid
128+ selectedOrNullBlock := filterCounter .NextOrNotWord ()
129+ if selectedOrNullBlock .NoneSet () {
130+ pos += T (selectedOrNullBlock .Len )
131+ posWithOffset += int64 (selectedOrNullBlock .Len )
132+ continue
133+ }
134+ bldr .Reserve (int (selectedOrNullBlock .Popcnt ))
135+
136+ // if the values are all valid and the selectedOrNullBlock
137+ // is full, then we can infer that all the values are true
138+ // and skip the bit checking
139+ isValidBlock := isValidCounter .NextWord ()
140+ if selectedOrNullBlock .AllSet () && isValidBlock .AllSet () {
141+ // all the values are selected and non-null
142+ for i := 0 ; i < int (selectedOrNullBlock .Len ); i ++ {
143+ bldr .UnsafeAppend (pos )
144+ pos ++
145+ }
146+ posWithOffset += int64 (selectedOrNullBlock .Len )
147+ } else {
148+ // some of the values are false or null
149+ for i := 0 ; i < int (selectedOrNullBlock .Len ); i ++ {
150+ if bitutil .BitIsSet (filterIsValid , int (posWithOffset )) {
151+ if bitutil .BitIsSet (filterData , int (posWithOffset )) {
152+ bldr .UnsafeAppend (pos )
153+ }
154+ } else {
155+ // null slot, append null
156+ bldr .UnsafeAppendBoolToBitmap (false )
157+ }
158+ pos ++
159+ posWithOffset ++
160+ }
161+ }
162+ }
163+
164+ result := bldr .NewArray ()
165+ defer result .Release ()
166+ result .Data ().Retain ()
167+ return result .Data ()
168+ }
169+
170+ bldr := newBufferBuilder [T ](mem )
171+ if haveFilterNulls {
172+ // the filter may have nulls, so we scan the validity bitmap
173+ // and the filter data bitmap together
174+ debug .Assert (nullSelect == DropNulls , "incorrect nullselect logic" )
175+
176+ // position relative to start of the filter
177+ var pos T
178+ // current position taking the filter offset into account
179+ posWithOffset := filter .Offset
180+
181+ filterCounter := bitutils .NewBinaryBitBlockCounter (filterData , filterIsValid , filter .Offset , filter .Offset , filter .Len )
182+ for int64 (pos ) < filter .Len {
183+ andBlock := filterCounter .NextAndWord ()
184+ bldr .reserve (int (andBlock .Popcnt ))
185+ if andBlock .AllSet () {
186+ // all the values are selected and non-null
187+ for i := 0 ; i < int (andBlock .Len ); i ++ {
188+ bldr .unsafeAppend (pos )
189+ pos ++
190+ }
191+ posWithOffset += int64 (andBlock .Len )
192+ } else if ! andBlock .NoneSet () {
193+ // some values are false or null
194+ for i := 0 ; i < int (andBlock .Len ); i ++ {
195+ if bitutil .BitIsSet (filterIsValid , int (posWithOffset )) && bitutil .BitIsSet (filterData , int (posWithOffset )) {
196+ bldr .unsafeAppend (pos )
197+ }
198+ pos ++
199+ posWithOffset ++
200+ }
201+ } else {
202+ pos += T (andBlock .Len )
203+ posWithOffset += int64 (andBlock .Len )
204+ }
205+ }
206+ } else {
207+ // filter has no nulls, so we only need to look for true values
208+ bitutils .VisitSetBitRuns (filterData , filter .Offset , filter .Len ,
209+ func (pos , length int64 ) error {
210+ // append consecutive run of indices
211+ bldr .reserve (int (length ))
212+ for i := int64 (0 ); i < length ; i ++ {
213+ bldr .unsafeAppend (T (pos + i ))
214+ }
215+ return nil
216+ })
217+ }
218+
219+ length := bldr .len ()
220+ outBuf := bldr .finish ()
221+ defer outBuf .Release ()
222+ return array .NewData (idxType , length , []* memory.Buffer {nil , outBuf }, nil , 0 , 0 )
223+ }
224+
225+ func GetTakeIndices (mem memory.Allocator , filter * exec.ArraySpan , nullSelect NullSelectionBehavior ) (arrow.ArrayData , error ) {
226+ debug .Assert (filter .Type .ID () == arrow .BOOL , "filter should be a boolean array" )
227+ if filter .Len < math .MaxUint16 {
228+ return getTakeIndices [uint16 ](mem , filter , nullSelect ), nil
229+ } else if filter .Len < math .MaxUint32 {
230+ return getTakeIndices [uint32 ](mem , filter , nullSelect ), nil
231+ }
232+ return nil , fmt .Errorf ("%w: filter length exceeds UINT32_MAX, consider a different strategy for selecting elements" ,
233+ arrow .ErrNotImplemented )
234+ }
235+
90236type writeFiltered interface {
91237 OutPos () int
92238 WriteValue (int64 )
@@ -1121,6 +1267,9 @@ func VarBinaryImpl[OffsetT int32 | int64](ctx *exec.KernelCtx, batch *exec.ExecS
11211267 valOffset := rawOffsets [idx ]
11221268 valSize := rawOffsets [idx + 1 ] - valOffset
11231269
1270+ if valSize == 0 {
1271+ return nil
1272+ }
11241273 offset += valSize
11251274 if valSize > OffsetT (spaceAvail ) {
11261275 dataBuilder .reserve (int (valSize ))
@@ -1362,6 +1511,19 @@ func FilterBinary(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResul
13621511 return fmt .Errorf ("%w: invalid type for binary filter" , arrow .ErrInvalid )
13631512}
13641513
1514+ func visitNoop () error { return nil }
1515+ func visitIdxNoop (int64 ) error { return nil }
1516+
1517+ func StructImpl (ctx * exec.KernelCtx , batch * exec.ExecSpan , outputLength int64 , out * exec.ExecResult , fn selectionOutputFn ) error {
1518+ var (
1519+ values = & batch .Values [0 ].Array
1520+ selection = & batch .Values [1 ].Array
1521+ )
1522+
1523+ // nothing we need to do other than generate the validity bitmap
1524+ return fn (ctx , outputLength , values , selection , out , visitIdxNoop , visitNoop )
1525+ }
1526+
13651527type SelectionKernelData struct {
13661528 In exec.InputType
13671529 Exec exec.ArrayKernelExec
0 commit comments