Skip to content

Commit 708f1b4

Browse files
committed
move stride to data, fix chunked slicing, remove intermediate binding and getters in favor of direct property accesses
1 parent 78ecc4c commit 708f1b4

15 files changed

Lines changed: 139 additions & 165 deletions

js/src/data.ts

Lines changed: 91 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
import { DataType } from './type';
1918
import { Vector } from './vector';
2019
import { popcnt_bit_range } from './util/bit';
2120
import { toArrayBufferView } from './util/buffer';
22-
import { VectorType as BufferType, UnionMode } from './enum';
21+
import { DataType, SparseUnion, DenseUnion } from './type';
22+
import { VectorType as BufferType, UnionMode, Type } from './enum';
2323
import {
2424
Dictionary,
2525
Null, Int, Float,
@@ -30,7 +30,7 @@ import {
3030

3131
// When slicing, we do not know the null count of the sliced range without
3232
// doing some computation. To avoid doing this eagerly, we set the null count
33-
// to -1 (any negative number will do). When Array::null_count is called the
33+
// to -1 (any negative number will do). When Vector.nullCount is called the
3434
// first time, the null count will be computed. See ARROW-33
3535
/** @ignore */ export type kUnknownNullCount = -1;
3636
/** @ignore */ export const kUnknownNullCount = -1;
@@ -42,12 +42,13 @@ import {
4242

4343
/** @ignore */
4444
export interface Buffers<T extends DataType> {
45-
[BufferType.OFFSET]?: Int32Array;
46-
[BufferType.DATA]?: T['TArray'];
47-
[BufferType.VALIDITY]?: Uint8Array;
48-
[BufferType.TYPE]?: T['TArray'];
45+
[BufferType.OFFSET]: Int32Array;
46+
[BufferType.DATA]: T['TArray'];
47+
[BufferType.VALIDITY]: Uint8Array;
48+
[BufferType.TYPE]: T['TArray'];
4949
}
5050

51+
/** @ignore */
5152
export interface Data<T extends DataType = DataType> {
5253
readonly TType: T['TType'];
5354
readonly TArray: T['TArray'];
@@ -57,86 +58,100 @@ export interface Data<T extends DataType = DataType> {
5758
/** @ignore */
5859
export class Data<T extends DataType = DataType> {
5960

60-
protected _type: T;
61-
protected _length: number;
62-
protected _offset: number;
63-
61+
public readonly type: T;
62+
public readonly length: number;
63+
public readonly offset: number;
64+
public readonly stride: number;
65+
public readonly childData: Data[];
66+
public readonly values: Buffers<T>[BufferType.DATA];
67+
public readonly typeIds: Buffers<T>[BufferType.TYPE];
6468
// @ts-ignore
65-
protected _childData: Data[];
66-
protected _buffers = [] as Buffers<T>;
67-
protected _nullCount: number | kUnknownNullCount;
68-
69-
public get type() { return this._type; }
70-
public get length() { return this._length; }
71-
public get offset() { return this._offset; }
72-
public get typeId() { return this._type.typeId; }
73-
public get childData() { return this._childData; }
69+
public readonly nullBitmap: Buffers<T>[BufferType.VALIDITY];
70+
// @ts-ignore
71+
public readonly valueOffsets: Buffers<T>[BufferType.OFFSET];
72+
73+
public get ArrayType() { return this.type.ArrayType; }
74+
public get typeId(): T['TType'] { return this.type.typeId; }
75+
public get buffers() {
76+
return [this.valueOffsets, this.values, this.nullBitmap, this.typeIds] as Buffers<T>;
77+
}
7478

75-
public get ArrayType() { return this._type.ArrayType; }
79+
protected _nullCount: number | kUnknownNullCount;
7680

77-
public get buffers() { return this._buffers; }
78-
public get values() { return this._buffers[BufferType.DATA]!; }
79-
public get typeIds() { return this._buffers[BufferType.TYPE]!; }
80-
public get nullBitmap() { return this._buffers[BufferType.VALIDITY]!; }
81-
public get valueOffsets() { return this._buffers[BufferType.OFFSET]!; }
8281
public get nullCount() {
8382
let nullCount = this._nullCount;
8483
let nullBitmap: Uint8Array | undefined;
85-
if (nullCount === kUnknownNullCount && (nullBitmap = this._buffers[BufferType.VALIDITY])) {
86-
this._nullCount = nullCount = this._length - popcnt_bit_range(nullBitmap, this._offset, this._offset + this._length);
84+
if (nullCount <= kUnknownNullCount && (nullBitmap = this.nullBitmap)) {
85+
this._nullCount = nullCount = this.length - popcnt_bit_range(nullBitmap, this.offset, this.offset + this.length);
8786
}
8887
return nullCount;
8988
}
9089

91-
constructor(type: T, offset: number, length: number, nullCount?: number, buffers?: Buffers<T>, childData?: (Data | Vector)[]) {
92-
this._type = type;
93-
this._offset = Math.floor(Math.max(offset || 0, 0));
94-
this._length = Math.floor(Math.max(length || 0, 0));
95-
this._buffers = Object.assign([], buffers) as Buffers<T>;
90+
constructor(type: T, offset: number, length: number, nullCount?: number, buffers?: Partial<Buffers<T>> | Data<T>, childData?: (Data | Vector)[]) {
91+
this.type = type;
92+
this.offset = Math.floor(Math.max(offset || 0, 0));
93+
this.length = Math.floor(Math.max(length || 0, 0));
9694
this._nullCount = Math.floor(Math.max(nullCount || 0, -1));
97-
this._childData = (childData || []).map((x) => x instanceof Data ? x : x.data) as Data[];
95+
this.childData = (childData || []).map((x) => x instanceof Data ? x : x.data) as Data[];
96+
let buffer: Buffers<T>[keyof Buffers<T>];
97+
if (buffers instanceof Data) {
98+
this.stride = buffers.stride;
99+
this.values = buffers.values;
100+
this.typeIds = buffers.typeIds;
101+
this.nullBitmap = buffers.nullBitmap;
102+
this.valueOffsets = buffers.valueOffsets;
103+
} else {
104+
if (buffers) {
105+
(buffer = (buffers as Buffers<T>)[0]) && (this.valueOffsets = buffer);
106+
(buffer = (buffers as Buffers<T>)[1]) && (this.values = buffer);
107+
(buffer = (buffers as Buffers<T>)[2]) && (this.nullBitmap = buffer);
108+
(buffer = (buffers as Buffers<T>)[3]) && (this.typeIds = buffer);
109+
}
110+
const t: any = type;
111+
switch (type.typeId) {
112+
case Type.Decimal: this.stride = 4; break;
113+
case Type.Timestamp: this.stride = 2; break;
114+
case Type.Date: this.stride = 1 + (t as Date_).unit; break;
115+
case Type.Interval: this.stride = 1 + (t as Interval).unit; break;
116+
case Type.Int: this.stride = 1 + +((t as Int).bitWidth > 32); break;
117+
case Type.Time: this.stride = 1 + +((t as Time).bitWidth > 32); break;
118+
case Type.FixedSizeList: this.stride = (t as FixedSizeList).listSize; break;
119+
case Type.FixedSizeBinary: this.stride = (t as FixedSizeBinary).byteWidth; break;
120+
default: this.stride = 1;
121+
}
122+
}
98123
}
99124

100-
public clone<R extends DataType>(type: R, offset = this._offset, length = this._length, nullCount = this._nullCount, buffers: Buffers<R> = <any> this._buffers, childData: (Data | Vector)[] = this._childData) {
125+
public clone<R extends DataType>(type: R, offset = this.offset, length = this.length, nullCount = this._nullCount, buffers: Buffers<R> = <any> this, childData: (Data | Vector)[] = this.childData) {
101126
return new Data(type, offset, length, nullCount, buffers, childData);
102127
}
103128

104129
public slice(offset: number, length: number): Data<T> {
105130
// +true === 1, +false === 0, so this means
106131
// we keep nullCount at 0 if it's already 0,
107132
// otherwise set to the invalidated flag -1
133+
const { stride, typeId, childData } = this;
108134
const nullCount = +(this._nullCount === 0) - 1;
109-
const buffers = this.sliceBuffers(offset, length);
110-
const childData = this.sliceChildren(offset, length);
111-
return this.clone<T>(this._type, this._offset + offset, length, nullCount, buffers, childData);
135+
const childStride = typeId === 16 /* FixedSizeList */ ? stride : 1;
136+
const buffers = this._sliceBuffers(offset, length, stride, typeId);
137+
return this.clone<T>(this.type, this.offset + offset, length, nullCount, buffers,
138+
// Don't slice children if we have value offsets (the variable-width types)
139+
(!childData.length || this.valueOffsets) ? childData : this._sliceChildren(childData, childStride * offset, childStride * length));
112140
}
113141

114-
protected sliceBuffers(offset: number, length: number): Buffers<T> {
115-
let arr: any, buffers = Object.assign([], this._buffers) as Buffers<T>;
142+
protected _sliceBuffers(offset: number, length: number, stride: number, typeId: T['TType']): Buffers<T> {
143+
let arr: any, { buffers } = this;
116144
// If typeIds exist, slice the typeIds buffer
117-
(arr = buffers[BufferType.TYPE]) && (buffers[BufferType.TYPE] = this.sliceData(arr, offset, length));
145+
(arr = buffers[BufferType.TYPE]) && (buffers[BufferType.TYPE] = arr.subarray(offset, offset + length));
118146
// If offsets exist, only slice the offsets buffer
119-
(arr = buffers[BufferType.OFFSET]) && (buffers[BufferType.OFFSET] = this.sliceOffsets(arr, offset, length)) ||
120-
// Otherwise if no offsets, slice the data buffer
121-
(arr = buffers[BufferType.DATA]) && (buffers[BufferType.DATA] = this.sliceData(arr, offset, length));
147+
(arr = buffers[BufferType.OFFSET]) && (buffers[BufferType.OFFSET] = arr.subarray(offset, offset + length + 1)) ||
148+
// Otherwise if no offsets, slice the data buffer. Don't slice the data vector for Booleans, since the offset goes by bits not bytes
149+
(arr = buffers[BufferType.DATA]) && (buffers[BufferType.DATA] = typeId === 6 ? arr : arr.subarray(stride * offset, stride * (offset + length)));
122150
return buffers;
123151
}
124152

125-
protected sliceChildren(offset: number, length: number): Data[] {
126-
// Only slice children if this isn't variable width data
127-
if (!this._buffers[BufferType.OFFSET]) {
128-
return this._childData.map((child) => child.slice(offset, length));
129-
}
130-
return this._childData;
131-
}
132-
133-
protected sliceData(data: T['TArray'] & ArrayBufferView, offset: number, length: number) {
134-
// Don't slice the data vector for Booleans, since the offset goes by bits not bytes
135-
return this._type.typeId === 6 ? data : data.subarray(offset, offset + length);
136-
}
137-
138-
protected sliceOffsets(valueOffsets: Int32Array, offset: number, length: number) {
139-
return valueOffsets.subarray(offset, offset + length + 1);
153+
protected _sliceChildren(childData: Data[], offset: number, length: number): Data[] {
154+
return childData.map((child) => child.slice(offset, length));
140155
}
141156

142157
//
@@ -235,40 +250,44 @@ export class Data<T extends DataType = DataType> {
235250
});
236251
}
237252
/** @nocollapse */
238-
public static List<T extends List>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, childData: Data | Vector) {
253+
public static List<T extends List>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, child: Data<T['valueType']> | Vector<T['valueType']>) {
239254
return new Data(type, offset, length, nullCount, {
240255
[BufferType.VALIDITY]: toArrayBufferView(Uint8Array, nullBitmap),
241256
[BufferType.OFFSET]: toArrayBufferView(Int32Array, valueOffsets)
242-
}, [childData]);
257+
}, [child]);
243258
}
244259
/** @nocollapse */
245-
public static FixedSizeList<T extends FixedSizeList>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, childData: Data | Vector) {
260+
public static FixedSizeList<T extends FixedSizeList>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, child: Data | Vector) {
246261
return new Data(type, offset, length, nullCount, {
247262
[BufferType.VALIDITY]: toArrayBufferView(Uint8Array, nullBitmap)
248-
}, [childData]);
263+
}, [child]);
249264
}
250265
/** @nocollapse */
251-
public static Struct<T extends Struct>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, childData: (Data | Vector)[]) {
266+
public static Struct<T extends Struct>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, children: (Data | Vector)[]) {
252267
return new Data(type, offset, length, nullCount, {
253268
[BufferType.VALIDITY]: toArrayBufferView(Uint8Array, nullBitmap)
254-
}, childData);
269+
}, children);
255270
}
256271
/** @nocollapse */
257-
public static Map<T extends Map_>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, childData: (Data | Vector)[]) {
272+
public static Map<T extends Map_>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, children: (Data | Vector)[]) {
258273
return new Data(type, offset, length, nullCount, {
259274
[BufferType.VALIDITY]: toArrayBufferView(Uint8Array, nullBitmap)
260-
}, childData);
275+
}, children);
261276
}
277+
public static Union<T extends SparseUnion>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, children: (Data | Vector)[]): Data<T>;
278+
public static Union<T extends DenseUnion>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsets: ValueOffsetsBuffer, children: (Data | Vector)[]): Data<T>;
262279
/** @nocollapse */
263-
public static Union<T extends Union>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsetsOrChildData: ValueOffsetsBuffer | (Data | Vector)[], childData?: (Data | Vector)[]) {
280+
public static Union<T extends Union>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsetsOrChildren: ValueOffsetsBuffer | (Data | Vector)[], children?: (Data | Vector)[]) {
264281
const buffers = {
265282
[BufferType.VALIDITY]: toArrayBufferView(Uint8Array, nullBitmap),
266283
[BufferType.TYPE]: toArrayBufferView(type.ArrayType, typeIds)
267-
} as any;
284+
} as Partial<Buffers<T>>;
268285
if (type.mode === UnionMode.Sparse) {
269-
return new Data(type, offset, length, nullCount, buffers, valueOffsetsOrChildData as (Data | Vector)[]);
286+
return new Data(type, offset, length, nullCount, buffers, valueOffsetsOrChildren as (Data | Vector)[]);
270287
}
271-
buffers[BufferType.OFFSET] = toArrayBufferView(Int32Array, <ValueOffsetsBuffer> valueOffsetsOrChildData);
272-
return new Data(type, offset, length, nullCount, buffers, childData);
288+
buffers[BufferType.OFFSET] = toArrayBufferView(Int32Array, <ValueOffsetsBuffer> valueOffsetsOrChildren);
289+
return new Data(type, offset, length, nullCount, buffers, children);
273290
}
274291
}
292+
293+
((Data.prototype as any).childData = Object.freeze([]));

js/src/recordbatch.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ export class RecordBatch<T extends { [key: string]: DataType } = any>
8181
const fields = this._schema.fields;
8282
const schema = this._schema.select(...columnNames);
8383
const childNames = columnNames.reduce((xs, x) => (xs[x] = true) && xs, <any> {});
84-
const childData = this._data.childData.filter((_, i) => childNames[fields[i].name]);
84+
const childData = this.data.childData.filter((_, i) => childNames[fields[i].name]);
8585
const structData = Data.Struct(new Struct(schema.fields), 0, this.length, 0, null, childData);
8686
return new RecordBatch<{ [P in K]: T[P] }>(schema, structData as Data<Struct<{ [P in K]: T[P] }>>);
8787
}

js/src/vector/base.ts

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -26,48 +26,43 @@ import { Clonable, Sliceable, Applicative } from '../vector';
2626
export interface BaseVector<T extends DataType = any> extends Clonable<VType<T>>, Sliceable<VType<T>>, Applicative<T, Chunked<T>> {
2727
slice(begin?: number, end?: number): VType<T>;
2828
concat(...others: Vector<T>[]): Chunked<T>;
29-
clone<R extends DataType = T>(data: Data<R>, children?: Vector<R>[], stride?: number): VType<R>;
29+
clone<R extends DataType = T>(data: Data<R>, children?: Vector<R>[]): VType<R>;
3030
}
3131

3232
export abstract class BaseVector<T extends DataType = any> extends Vector<T>
3333
implements Clonable<VType<T>>, Sliceable<VType<T>>, Applicative<T, Chunked<T>> {
3434

35-
// @ts-ignore
36-
protected _data: Data<T>;
37-
protected _stride: number = 1;
38-
protected _numChildren: number = 0;
3935
protected _children?: Vector[];
4036

41-
constructor(data: Data<T>, children?: Vector[], stride?: number) {
37+
constructor(data: Data<T>, children?: Vector[]) {
4238
super();
4339
this._children = children;
44-
this._numChildren = data.childData.length;
45-
this._bindDataAccessors(this._data = data);
46-
this._stride = Math.floor(Math.max(stride || 1, 1));
40+
this.numChildren = data.childData.length;
41+
this._bindDataAccessors(this.data = data);
4742
}
4843

49-
public get data() { return this._data; }
50-
public get stride() { return this._stride; }
51-
public get numChildren() { return this._numChildren; }
44+
public readonly data: Data<T>;
45+
public readonly numChildren: number;
5246

53-
public get type() { return this._data.type; }
54-
public get typeId() { return this._data.typeId as T['TType']; }
55-
public get length() { return this._data.length; }
56-
public get offset() { return this._data.offset; }
57-
public get nullCount() { return this._data.nullCount; }
47+
public get type() { return this.data.type; }
48+
public get typeId() { return this.data.typeId; }
49+
public get length() { return this.data.length; }
50+
public get offset() { return this.data.offset; }
51+
public get stride() { return this.data.stride; }
52+
public get nullCount() { return this.data.nullCount; }
5853
public get VectorName() { return this.constructor.name; }
5954

60-
public get ArrayType(): T['ArrayType'] { return this._data.ArrayType; }
55+
public get ArrayType(): T['ArrayType'] { return this.data.ArrayType; }
6156

62-
public get values() { return this._data.values; }
63-
public get typeIds() { return this._data.typeIds; }
64-
public get nullBitmap() { return this._data.nullBitmap; }
65-
public get valueOffsets() { return this._data.valueOffsets; }
57+
public get values() { return this.data.values; }
58+
public get typeIds() { return this.data.typeIds; }
59+
public get nullBitmap() { return this.data.nullBitmap; }
60+
public get valueOffsets() { return this.data.valueOffsets; }
6661

6762
public get [Symbol.toStringTag]() { return `${this.VectorName}<${this.type[Symbol.toStringTag]}>`; }
6863

69-
public clone<R extends DataType = T>(data: Data<R>, children = this._children, stride = this._stride) {
70-
return Vector.new<R>(data, children, stride) as any;
64+
public clone<R extends DataType = T>(data: Data<R>, children = this._children) {
65+
return Vector.new<R>(data, children) as any;
7166
}
7267

7368
public concat(...others: Vector<T>[]) {
@@ -94,20 +89,21 @@ export abstract class BaseVector<T extends DataType = any> extends Vector<T>
9489
public getChildAt<R extends DataType = any>(index: number): Vector<R> | null {
9590
return index < 0 || index >= this.numChildren ? null : (
9691
(this._children || (this._children = []))[index] ||
97-
(this._children[index] = Vector.new<R>(this._data.childData[index] as Data<R>))
92+
(this._children[index] = Vector.new<R>(this.data.childData[index] as Data<R>))
9893
) as Vector<R>;
9994
}
10095

10196
// @ts-ignore
10297
public toJSON(): any { return [...this]; }
10398

104-
protected _sliceInternal(self: this, offset: number, length: number) {
105-
const stride = self.stride;
106-
return self.clone(self.data.slice(offset * stride, (length - offset) * stride));
99+
protected _sliceInternal(self: this, begin: number, end: number) {
100+
return self.clone(self.data.slice(begin, end - begin));
107101
}
108102

109103
// @ts-ignore
110104
protected _bindDataAccessors(data: Data<T>) {
111105
// Implementation in src/vectors/index.ts due to circular dependency/packaging shenanigans
112106
}
113107
}
108+
109+
(BaseVector.prototype as any)[Symbol.isConcatSpreadable] = true;

0 commit comments

Comments
 (0)