@@ -23,6 +23,7 @@ import (
2323
2424 "github.com/apache/arrow/go/v10/arrow"
2525 "github.com/apache/arrow/go/v10/arrow/array"
26+ "github.com/apache/arrow/go/v10/arrow/bitutil"
2627 "github.com/apache/arrow/go/v10/arrow/compute/internal/exec"
2728 "github.com/apache/arrow/go/v10/arrow/compute/internal/kernels"
2829)
@@ -150,6 +151,156 @@ func CastFromExtension(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.Exec
150151 return nil
151152}
152153
154+ func CastList [SrcOffsetT , DestOffsetT int32 | int64 ](ctx * exec.KernelCtx , batch * exec.ExecSpan , out * exec.ExecResult ) error {
155+ var (
156+ opts = ctx .State .(kernels.CastState )
157+ childType = out .Type .(arrow.NestedType ).Fields ()[0 ].Type
158+ input = & batch .Values [0 ].Array
159+ offsets = exec .GetSpanOffsets [SrcOffsetT ](input , 1 )
160+ isDowncast = kernels .SizeOf [SrcOffsetT ]() > kernels .SizeOf [DestOffsetT ]()
161+ )
162+
163+ out .Buffers [0 ] = input .Buffers [0 ]
164+ out .Buffers [1 ] = input .Buffers [1 ]
165+
166+ if input .Offset != 0 && len (input .Buffers [0 ].Buf ) > 0 {
167+ out .Buffers [0 ].WrapBuffer (ctx .AllocateBitmap (input .Len ))
168+ bitutil .CopyBitmap (input .Buffers [0 ].Buf , int (input .Offset ), int (input .Len ),
169+ out .Buffers [0 ].Buf , 0 )
170+ }
171+
172+ // Handle list offsets
173+ // Several cases possible:
174+ // - The source offset is non-zero, in which case we slice the
175+ // underlying values and shift the list offsets (regardless of
176+ // their respective types)
177+ // - the source offset is zero but the source and destination types
178+ // have different list offset types, in which case we cast the offsets
179+ // - otherwise we simply keep the original offsets
180+ if isDowncast {
181+ if offsets [input .Len ] > SrcOffsetT (kernels .MaxOf [DestOffsetT ]()) {
182+ return fmt .Errorf ("%w: array of type %s too large to convert to %s" ,
183+ arrow .ErrInvalid , input .Type , out .Type )
184+ }
185+ }
186+
187+ values := input .Children [0 ].MakeArray ()
188+ defer values .Release ()
189+
190+ if input .Offset != 0 {
191+ out .Buffers [1 ].WrapBuffer (
192+ ctx .Allocate (out .Type .(arrow.OffsetsDataType ).
193+ OffsetTypeTraits ().BytesRequired (int (input .Len ) + 1 )))
194+
195+ shiftedOffsets := exec .GetSpanOffsets [DestOffsetT ](out , 1 )
196+ for i := 0 ; i < int (input .Len )+ 1 ; i ++ {
197+ shiftedOffsets [i ] = DestOffsetT (offsets [i ] - offsets [0 ])
198+ }
199+
200+ values = array .NewSlice (values , int64 (offsets [0 ]), int64 (offsets [input .Len ]))
201+ defer values .Release ()
202+ } else if kernels .SizeOf [SrcOffsetT ]() != kernels .SizeOf [DestOffsetT ]() {
203+ out .Buffers [1 ].WrapBuffer (ctx .Allocate (out .Type .(arrow.OffsetsDataType ).
204+ OffsetTypeTraits ().BytesRequired (int (input .Len ) + 1 )))
205+
206+ kernels .DoStaticCast (exec .GetSpanOffsets [SrcOffsetT ](input , 1 ),
207+ exec .GetSpanOffsets [DestOffsetT ](out , 1 ))
208+ }
209+
210+ // handle values
211+ opts .ToType = childType
212+
213+ castedValues , err := CastArray (ctx .Ctx , values , & opts )
214+ if err != nil {
215+ return err
216+ }
217+ defer castedValues .Release ()
218+
219+ out .Children = make ([]exec.ArraySpan , 1 )
220+ out .Children [0 ].SetMembers (castedValues .Data ())
221+ for i , b := range out .Children [0 ].Buffers {
222+ if b .Owner != nil && b .Owner != values .Data ().Buffers ()[i ] {
223+ b .Owner .Retain ()
224+ b .SelfAlloc = true
225+ }
226+ }
227+ return nil
228+ }
229+
230+ func CastStruct (ctx * exec.KernelCtx , batch * exec.ExecSpan , out * exec.ExecResult ) error {
231+ var (
232+ opts = ctx .State .(kernels.CastState )
233+ inType = batch .Values [0 ].Array .Type .(* arrow.StructType )
234+ outType = out .Type .(* arrow.StructType )
235+ inFieldCount = len (inType .Fields ())
236+ outFieldCount = len (outType .Fields ())
237+ )
238+
239+ fieldsToSelect := make ([]int , outFieldCount )
240+ for i := range fieldsToSelect {
241+ fieldsToSelect [i ] = - 1
242+ }
243+
244+ outFieldIndex := 0
245+ for inFieldIndex := 0 ; inFieldIndex < inFieldCount && outFieldIndex < outFieldCount ; inFieldIndex ++ {
246+ inField := inType .Field (inFieldIndex )
247+ outField := outType .Field (outFieldIndex )
248+ if inField .Name == outField .Name {
249+ if inField .Nullable && ! outField .Nullable {
250+ return fmt .Errorf ("%w: cannot cast nullable field to non-nullable field: %s %s" ,
251+ arrow .ErrType , inType , outType )
252+ }
253+ fieldsToSelect [outFieldIndex ] = inFieldIndex
254+ outFieldIndex ++
255+ }
256+ }
257+
258+ if outFieldIndex < outFieldCount {
259+ return fmt .Errorf ("%w: struct fields don't match or are in the wrong order: Input: %s Output: %s" ,
260+ arrow .ErrType , inType , outType )
261+ }
262+
263+ input := & batch .Values [0 ].Array
264+ if len (input .Buffers [0 ].Buf ) > 0 {
265+ out .Buffers [0 ].WrapBuffer (ctx .AllocateBitmap (input .Len ))
266+ bitutil .CopyBitmap (input .Buffers [0 ].Buf , int (input .Offset ), int (input .Len ),
267+ out .Buffers [0 ].Buf , 0 )
268+ }
269+
270+ out .Children = make ([]exec.ArraySpan , outFieldCount )
271+ for outFieldIndex , idx := range fieldsToSelect {
272+ values := input .Children [idx ].MakeArray ()
273+ defer values .Release ()
274+ values = array .NewSlice (values , input .Offset , input .Len )
275+ defer values .Release ()
276+
277+ opts .ToType = outType .Field (outFieldIndex ).Type
278+ castedValues , err := CastArray (ctx .Ctx , values , & opts )
279+ if err != nil {
280+ return err
281+ }
282+ defer castedValues .Release ()
283+
284+ out .Children [outFieldIndex ].TakeOwnership (castedValues .Data ())
285+ }
286+ return nil
287+ }
288+
289+ func addListCast [SrcOffsetT , DestOffsetT int32 | int64 ](fn * castFunction , inType arrow.Type ) error {
290+ kernel := exec .NewScalarKernel ([]exec.InputType {exec .NewIDInput (inType )},
291+ kernels .OutputTargetType , CastList [SrcOffsetT , DestOffsetT ], nil )
292+ kernel .NullHandling = exec .NullComputedNoPrealloc
293+ kernel .MemAlloc = exec .MemNoPrealloc
294+ return fn .AddTypeCast (inType , kernel )
295+ }
296+
297+ func addStructToStructCast (fn * castFunction ) error {
298+ kernel := exec .NewScalarKernel ([]exec.InputType {exec .NewIDInput (arrow .STRUCT )},
299+ kernels .OutputTargetType , CastStruct , nil )
300+ kernel .NullHandling = exec .NullComputedNoPrealloc
301+ return fn .AddTypeCast (arrow .STRUCT , kernel )
302+ }
303+
153304func addCastFuncs (fn []* castFunction ) {
154305 for _ , f := range fn {
155306 f .AddNewTypeCast (arrow .EXTENSION , []exec.InputType {exec .NewIDInput (arrow .EXTENSION )},
@@ -165,6 +316,12 @@ func initCastTable() {
165316 addCastFuncs (getNumericCasts ())
166317 addCastFuncs (getBinaryLikeCasts ())
167318 addCastFuncs (getTemporalCasts ())
319+ addCastFuncs (getNestedCasts ())
320+
321+ nullToExt := newCastFunction ("cast_extension" , arrow .EXTENSION )
322+ nullToExt .AddNewTypeCast (arrow .NULL , []exec.InputType {exec .NewExactInput (arrow .Null )},
323+ kernels .OutputTargetType , kernels .CastFromNull , exec .NullComputedNoPrealloc , exec .MemNoPrealloc )
324+ castTable [arrow .EXTENSION ] = nullToExt
168325}
169326
170327func getCastFunction (to arrow.DataType ) (* castFunction , error ) {
@@ -178,6 +335,51 @@ func getCastFunction(to arrow.DataType) (*castFunction, error) {
178335 return nil , fmt .Errorf ("%w: unsupported cast to %s" , arrow .ErrNotImplemented , to )
179336}
180337
338+ func getNestedCasts () []* castFunction {
339+ out := make ([]* castFunction , 0 )
340+
341+ addKernels := func (fn * castFunction , kernels []exec.ScalarKernel ) {
342+ for _ , k := range kernels {
343+ if err := fn .AddTypeCast (k .Signature .InputTypes [0 ].MatchID (), k ); err != nil {
344+ panic (err )
345+ }
346+ }
347+ }
348+
349+ castLists := newCastFunction ("cast_list" , arrow .LIST )
350+ addKernels (castLists , kernels .GetCommonCastKernels (arrow .LIST , kernels .OutputTargetType ))
351+ if err := addListCast [int32 , int32 ](castLists , arrow .LIST ); err != nil {
352+ panic (err )
353+ }
354+ if err := addListCast [int64 , int32 ](castLists , arrow .LARGE_LIST ); err != nil {
355+ panic (err )
356+ }
357+ out = append (out , castLists )
358+
359+ castLargeLists := newCastFunction ("cast_large_list" , arrow .LARGE_LIST )
360+ addKernels (castLargeLists , kernels .GetCommonCastKernels (arrow .LARGE_LIST , kernels .OutputTargetType ))
361+ if err := addListCast [int32 , int64 ](castLargeLists , arrow .LIST ); err != nil {
362+ panic (err )
363+ }
364+ if err := addListCast [int64 , int64 ](castLargeLists , arrow .LARGE_LIST ); err != nil {
365+ panic (err )
366+ }
367+ out = append (out , castLargeLists )
368+
369+ castFsl := newCastFunction ("cast_fixed_size_list" , arrow .FIXED_SIZE_LIST )
370+ addKernels (castFsl , kernels .GetCommonCastKernels (arrow .FIXED_SIZE_LIST , kernels .OutputTargetType ))
371+ out = append (out , castFsl )
372+
373+ castStruct := newCastFunction ("cast_struct" , arrow .STRUCT )
374+ addKernels (castStruct , kernels .GetCommonCastKernels (arrow .STRUCT , kernels .OutputTargetType ))
375+ if err := addStructToStructCast (castStruct ); err != nil {
376+ panic (err )
377+ }
378+ out = append (out , castStruct )
379+
380+ return out
381+ }
382+
181383func getBooleanCasts () []* castFunction {
182384 fn := newCastFunction ("cast_boolean" , arrow .BOOL )
183385 kns := kernels .GetBooleanCastKernels ()
0 commit comments