@@ -26,6 +26,8 @@ import (
2626 "github.com/apache/arrow/go/v9/arrow/bitutil"
2727 "github.com/apache/arrow/go/v9/arrow/internal/debug"
2828 "github.com/apache/arrow/go/v9/arrow/memory"
29+ "github.com/apache/arrow/go/v9/internal/bitutils"
30+ "github.com/apache/arrow/go/v9/internal/utils"
2931)
3032
3133// Concatenate creates a new arrow.Array which is the concatenation of the
@@ -228,6 +230,85 @@ func concatOffsets(buffers []*memory.Buffer, mem memory.Allocator) (*memory.Buff
228230 return out , valuesRanges , nil
229231}
230232
233+ func unifyDictionaries (mem memory.Allocator , data []arrow.ArrayData , dt * arrow.DictionaryType ) ([]* memory.Buffer , arrow.Array , error ) {
234+ unifier , err := NewDictionaryUnifier (mem , dt .ValueType )
235+ if err != nil {
236+ return nil , nil , err
237+ }
238+ defer unifier .Release ()
239+
240+ newLookup := make ([]* memory.Buffer , len (data ))
241+ for i , d := range data {
242+ dictArr := MakeFromData (d .Dictionary ())
243+ defer dictArr .Release ()
244+ newLookup [i ], err = unifier .UnifyAndTranspose (dictArr )
245+ if err != nil {
246+ return nil , nil , err
247+ }
248+ }
249+
250+ unified , err := unifier .GetResultWithIndexType (dt .IndexType )
251+ if err != nil {
252+ for _ , b := range newLookup {
253+ b .Release ()
254+ }
255+ return nil , nil , err
256+ }
257+ return newLookup , unified , nil
258+ }
259+
260+ func concatDictIndices (mem memory.Allocator , data []arrow.ArrayData , idxType arrow.FixedWidthDataType , transpositions []* memory.Buffer ) (out * memory.Buffer , err error ) {
261+ defer func () {
262+ if err != nil && out != nil {
263+ out .Release ()
264+ out = nil
265+ }
266+ }()
267+
268+ idxWidth := idxType .BitWidth () / 8
269+ outLen := 0
270+ for i , d := range data {
271+ outLen += d .Len ()
272+ defer transpositions [i ].Release ()
273+ }
274+
275+ out = memory .NewResizableBuffer (mem )
276+ out .Resize (outLen * idxWidth )
277+
278+ outData := out .Bytes ()
279+ for i , d := range data {
280+ transposeMap := arrow .Int32Traits .CastFromBytes (transpositions [i ].Bytes ())
281+ src := d .Buffers ()[1 ].Bytes ()
282+ if d .Buffers ()[0 ] == nil {
283+ if err = utils .TransposeIntsBuffers (idxType , idxType , src , outData , d .Offset (), 0 , d .Len (), transposeMap ); err != nil {
284+ return
285+ }
286+ } else {
287+ rdr := bitutils .NewBitRunReader (d .Buffers ()[0 ].Bytes (), int64 (d .Offset ()), int64 (d .Len ()))
288+ pos := 0
289+ for {
290+ run := rdr .NextRun ()
291+ if run .Len == 0 {
292+ break
293+ }
294+
295+ if run .Set {
296+ err = utils .TransposeIntsBuffers (idxType , idxType , src , outData , d .Offset ()+ pos , pos , int (run .Len ), transposeMap )
297+ if err != nil {
298+ return
299+ }
300+ } else {
301+ memory .Set (outData [pos :pos + (int (run .Len )* idxWidth )], 0x00 )
302+ }
303+
304+ pos += int (run .Len )
305+ }
306+ }
307+ outData = outData [d .Len ()* idxWidth :]
308+ }
309+ return
310+ }
311+
231312// concat is the implementation for actually performing the concatenation of the arrow.ArrayData
232313// objects that we can call internally for nested types.
233314func concat (data []arrow.ArrayData , mem memory.Allocator ) (arrow.ArrayData , error ) {
@@ -258,6 +339,42 @@ func concat(data []arrow.ArrayData, mem memory.Allocator) (arrow.ArrayData, erro
258339 return nil , err
259340 }
260341 out .buffers [1 ] = bm
342+ case * arrow.DictionaryType :
343+ idxType := dt .IndexType .(arrow.FixedWidthDataType )
344+ // two cases: all dictionaries are the same or we need to unify them
345+ dictsSame := true
346+ dict0 := MakeFromData (data [0 ].Dictionary ())
347+ defer dict0 .Release ()
348+ for _ , d := range data {
349+ dict := MakeFromData (d .Dictionary ())
350+ if ! Equal (dict0 , dict ) {
351+ dict .Release ()
352+ dictsSame = false
353+ break
354+ }
355+ dict .Release ()
356+ }
357+
358+ indexBuffers := gatherBuffersFixedWidthType (data , 1 , idxType )
359+ if dictsSame {
360+ out .dictionary = dict0 .Data ().(* Data )
361+ out .dictionary .Retain ()
362+ out .buffers [1 ] = concatBuffers (indexBuffers , mem )
363+ break
364+ }
365+
366+ indexLookup , unifiedDict , err := unifyDictionaries (mem , data , dt )
367+ if err != nil {
368+ return nil , err
369+ }
370+ defer unifiedDict .Release ()
371+ out .dictionary = unifiedDict .Data ().(* Data )
372+ out .dictionary .Retain ()
373+
374+ out .buffers [1 ], err = concatDictIndices (mem , data , idxType , indexLookup )
375+ if err != nil {
376+ return nil , err
377+ }
261378 case arrow.FixedWidthDataType :
262379 out .buffers [1 ] = concatBuffers (gatherBuffersFixedWidthType (data , 1 , dt ), mem )
263380 case arrow.BinaryDataType :
0 commit comments