forked from apache/arrow
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patharray.pyx
More file actions
192 lines (130 loc) · 4.28 KB
/
Copy patharray.pyx
File metadata and controls
192 lines (130 loc) · 4.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
from pyarrow.includes.libarrow cimport *
cimport pyarrow.includes.pyarrow as pyarrow
from pyarrow.compat import frombytes, tobytes
from pyarrow.error cimport check_status
cimport pyarrow.scalar as scalar
from pyarrow.scalar import NA
def total_allocated_bytes():
cdef MemoryPool* pool = pyarrow.GetMemoryPool()
return pool.bytes_allocated()
cdef class Array:
cdef init(self, const shared_ptr[CArray]& sp_array):
self.sp_array = sp_array
self.ap = sp_array.get()
self.type = DataType()
self.type.init(self.sp_array.get().type())
property null_count:
def __get__(self):
return self.sp_array.get().null_count()
def __iter__(self):
for i in range(len(self)):
yield self.getitem(i)
raise StopIteration
def __repr__(self):
from pyarrow.formatting import array_format
type_format = object.__repr__(self)
values = array_format(self, window=10)
return '{0}\n{1}'.format(type_format, values)
def __len__(self):
return self.sp_array.get().length()
def isnull(self):
raise NotImplemented
def __getitem__(self, key):
cdef:
Py_ssize_t n = len(self)
if PySlice_Check(key):
start = key.start or 0
while start < 0:
start += n
stop = key.stop if key.stop is not None else n
while stop < 0:
stop += n
step = key.step or 1
if step != 1:
raise NotImplementedError
else:
return self.slice(start, stop)
while key < 0:
key += len(self)
return self.getitem(key)
cdef getitem(self, int i):
return scalar.box_arrow_scalar(self.type, self.sp_array, i)
def slice(self, start, end):
pass
cdef class NullArray(Array):
pass
cdef class BooleanArray(Array):
pass
cdef class NumericArray(Array):
pass
cdef class Int8Array(NumericArray):
pass
cdef class UInt8Array(NumericArray):
pass
cdef class Int16Array(NumericArray):
pass
cdef class UInt16Array(NumericArray):
pass
cdef class Int32Array(NumericArray):
pass
cdef class UInt32Array(NumericArray):
pass
cdef class Int64Array(NumericArray):
pass
cdef class UInt64Array(NumericArray):
pass
cdef class FloatArray(NumericArray):
pass
cdef class DoubleArray(NumericArray):
pass
cdef class ListArray(Array):
pass
cdef class StringArray(Array):
pass
cdef dict _array_classes = {
LogicalType_NA: NullArray,
LogicalType_BOOL: BooleanArray,
LogicalType_INT64: Int64Array,
LogicalType_DOUBLE: DoubleArray,
LogicalType_LIST: ListArray,
LogicalType_STRING: StringArray,
}
cdef object box_arrow_array(const shared_ptr[CArray]& sp_array):
if sp_array.get() == NULL:
raise ValueError('Array was NULL')
cdef CDataType* data_type = sp_array.get().type().get()
if data_type == NULL:
raise ValueError('Array data type was NULL')
cdef Array arr = _array_classes[data_type.type]()
arr.init(sp_array)
return arr
def from_pylist(object list_obj, DataType type=None):
"""
Convert Python list to Arrow array
"""
cdef:
shared_ptr[CArray] sp_array
if type is None:
check_status(pyarrow.ConvertPySequence(list_obj, &sp_array))
else:
raise NotImplementedError
return box_arrow_array(sp_array)