Skip to content

Commit 199d3cf

Browse files
jorisvandenbosschewesm
authored andcommitted
ARROW-6158: [C++/Python] Validate child array types with type fields of StructArray
https://issues.apache.org/jira/browse/ARROW-6158 Closes apache#5488 from jorisvandenbossche/ARROW-6158-struct-array-validation and squashes the following commits: 7573781 <Joris Van den Bossche> ARROW-6158: Validate child array types with type fields of StructArray Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com> Signed-off-by: Wes McKinney <wesm+git@apache.org>
1 parent 232cde0 commit 199d3cf

3 files changed

Lines changed: 31 additions & 8 deletions

File tree

cpp/src/arrow/array.cc

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,7 @@ struct ValidateVisitor {
12341234
}
12351235

12361236
Status Visit(const StructArray& array) {
1237+
const auto& struct_type = checked_cast<const StructType&>(*array.type());
12371238
if (array.num_fields() > 0) {
12381239
// Validate fields
12391240
int64_t array_length = array.field(0)->length();
@@ -1245,10 +1246,17 @@ struct ValidateVisitor {
12451246
it->type()->ToString(), " at position [", idx, "]");
12461247
}
12471248

1249+
auto it_type = struct_type.child(i)->type();
1250+
if (!it->type()->Equals(it_type)) {
1251+
return Status::Invalid("Child array at position [", idx,
1252+
"] does not match type field: ", it->type()->ToString(),
1253+
" vs ", it_type->ToString());
1254+
}
1255+
12481256
const Status child_valid = it->Validate();
12491257
if (!child_valid.ok()) {
12501258
return Status::Invalid("Child array invalid: ", child_valid.ToString(),
1251-
" at position [", idx, "}");
1259+
" at position [", idx, "]");
12521260
}
12531261
++idx;
12541262
}

python/pyarrow/array.pxi

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,7 +1288,9 @@ cdef class UnionArray(Array):
12881288
check_status(CUnionArray.MakeDense(
12891289
deref(types.ap), deref(value_offsets.ap), c, c_field_names,
12901290
c_type_codes, &out))
1291-
return pyarrow_wrap_array(out)
1291+
cdef Array result = pyarrow_wrap_array(out)
1292+
result.validate()
1293+
return result
12921294

12931295
@staticmethod
12941296
def from_sparse(Array types, list children, list field_names=None,
@@ -1326,7 +1328,9 @@ cdef class UnionArray(Array):
13261328
c_field_names,
13271329
c_type_codes,
13281330
&out))
1329-
return pyarrow_wrap_array(out)
1331+
cdef Array result = pyarrow_wrap_array(out)
1332+
result.validate()
1333+
return result
13301334

13311335

13321336
cdef class StringArray(Array):
@@ -1501,7 +1505,9 @@ cdef class DictionaryArray(Array):
15011505
c_result.reset(new CDictionaryArray(c_type, _indices.sp_array,
15021506
_dictionary.sp_array))
15031507

1504-
return pyarrow_wrap_array(c_result)
1508+
cdef Array result = pyarrow_wrap_array(c_result)
1509+
result.validate()
1510+
return result
15051511

15061512

15071513
cdef class StructArray(Array):
@@ -1626,7 +1632,9 @@ cdef class StructArray(Array):
16261632
else:
16271633
c_result = CStructArray.MakeFromFields(
16281634
c_arrays, c_fields, shared_ptr[CBuffer](), -1, 0)
1629-
return pyarrow_wrap_array(GetResultValue(c_result))
1635+
cdef Array result = pyarrow_wrap_array(GetResultValue(c_result))
1636+
result.validate()
1637+
return result
16301638

16311639

16321640
cdef class ExtensionArray(Array):
@@ -1665,7 +1673,9 @@ cdef class ExtensionArray(Array):
16651673
"for extension type {1}".format(storage.type, typ))
16661674

16671675
ext_array = make_shared[CExtensionArray](typ.sp_type, storage.sp_array)
1668-
return pyarrow_wrap_array(<shared_ptr[CArray]> ext_array)
1676+
cdef Array result = pyarrow_wrap_array(<shared_ptr[CArray]> ext_array)
1677+
result.validate()
1678+
return result
16691679

16701680

16711681
cdef dict _array_classes = {

python/pyarrow/tests/test_array.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ def test_struct_from_buffers():
422422

423423

424424
def test_struct_from_arrays():
425-
a = pa.array([4, 5, 6])
425+
a = pa.array([4, 5, 6], type=pa.int64())
426426
b = pa.array(["bar", None, ""])
427427
c = pa.array([[1, 2], None, [3, None]])
428428
expected_list = [
@@ -447,7 +447,7 @@ def test_struct_from_arrays():
447447
# From fields
448448
fa = pa.field("a", a.type, nullable=False)
449449
fb = pa.field("b", b.type)
450-
fc = pa.field("c", b.type)
450+
fc = pa.field("c", c.type)
451451
arr = pa.StructArray.from_arrays([a, b, c], fields=[fa, fb, fc])
452452
assert arr.type == pa.struct([fa, fb, fc])
453453
assert not arr.type[0].nullable
@@ -460,6 +460,11 @@ def test_struct_from_arrays():
460460
assert arr.type == pa.struct([])
461461
assert arr.to_pylist() == []
462462

463+
# Inconsistent fields
464+
fa2 = pa.field("a", pa.int32())
465+
with pytest.raises(ValueError, match="int64 vs int32"):
466+
pa.StructArray.from_arrays([a, b, c], fields=[fa2, fb, fc])
467+
463468

464469
def test_dictionary_from_numpy():
465470
indices = np.repeat([0, 1, 2], 2)

0 commit comments

Comments
 (0)