Skip to content

Commit 06b1013

Browse files
zixi-bwangeerhardt
authored andcommitted
ARROW-15071: [C#] Fixed a bug in Column.cs ValidateArrayDataTypes method
Fixed a bug in Column.cs ValidateArrayDataTypes method: From: if (Data.Array(i).Data.DataType != Field.DataType) To: if (Data.Array(i).Data.DataType.TypeId != Field.DataType.TypeId) Added unit test in TestTableBasics and others. Closes apache#11931 from zixi-bwang/CSharpUnitTesting Lead-authored-by: Zixi <zixi.bwang@gmail.com> Co-authored-by: Zixi <89567557+zixi-bwang@users.noreply.github.com> Signed-off-by: Eric Erhardt <eric.erhardt@microsoft.com>
1 parent 968e6ea commit 06b1013

6 files changed

Lines changed: 279 additions & 1 deletion

File tree

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one or more
2+
// contributor license agreements. See the NOTICE file distributed with
3+
// this work for additional information regarding copyright ownership.
4+
// The ASF licenses this file to You under the Apache License, Version 2.0
5+
// (the "License"); you may not use this file except in compliance with
6+
// the License. You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
16+
using System;
17+
using Apache.Arrow.Types;
18+
19+
namespace Apache.Arrow
20+
{
21+
internal sealed class ArrayDataTypeComparer :
22+
IArrowTypeVisitor<TimestampType>,
23+
IArrowTypeVisitor<Date32Type>,
24+
IArrowTypeVisitor<Date64Type>,
25+
IArrowTypeVisitor<Time32Type>,
26+
IArrowTypeVisitor<Time64Type>,
27+
IArrowTypeVisitor<FixedSizeBinaryType>,
28+
IArrowTypeVisitor<ListType>,
29+
IArrowTypeVisitor<StructType>
30+
{
31+
private readonly IArrowType _expectedType;
32+
private bool _dataTypeMatch;
33+
34+
public ArrayDataTypeComparer(IArrowType expectedType)
35+
{
36+
_expectedType = expectedType;
37+
}
38+
39+
public bool DataTypeMatch => _dataTypeMatch;
40+
41+
public void Visit(TimestampType actualType)
42+
{
43+
if (_expectedType is TimestampType expectedType
44+
&& expectedType.Timezone == actualType.Timezone
45+
&& expectedType.Unit == actualType.Unit)
46+
{
47+
_dataTypeMatch = true;
48+
}
49+
}
50+
51+
public void Visit(Date32Type actualType)
52+
{
53+
if (_expectedType is Date32Type expectedType
54+
&& expectedType.Unit == actualType.Unit)
55+
{
56+
_dataTypeMatch = true;
57+
}
58+
}
59+
60+
public void Visit(Date64Type actualType)
61+
{
62+
if (_expectedType is Date64Type expectedType
63+
&& expectedType.Unit == actualType.Unit)
64+
{
65+
_dataTypeMatch = true;
66+
}
67+
}
68+
69+
public void Visit(Time32Type actualType)
70+
{
71+
if (_expectedType is Time32Type expectedType
72+
&& expectedType.Unit == actualType.Unit)
73+
{
74+
_dataTypeMatch = true;
75+
}
76+
}
77+
78+
public void Visit(Time64Type actualType)
79+
{
80+
if (_expectedType is Time64Type expectedType
81+
&& expectedType.Unit == actualType.Unit)
82+
{
83+
_dataTypeMatch = true;
84+
}
85+
}
86+
87+
public void Visit(FixedSizeBinaryType actualType)
88+
{
89+
if (_expectedType is FixedSizeBinaryType expectedType
90+
&& expectedType.ByteWidth == actualType.ByteWidth)
91+
{
92+
_dataTypeMatch = true;
93+
}
94+
}
95+
96+
public void Visit(ListType actualType)
97+
{
98+
if (_expectedType is ListType expectedType
99+
&& CompareNested(expectedType, actualType))
100+
{
101+
_dataTypeMatch = true;
102+
}
103+
}
104+
105+
public void Visit(StructType actualType)
106+
{
107+
if (_expectedType is StructType expectedType
108+
&& CompareNested(expectedType, actualType))
109+
{
110+
_dataTypeMatch = true;
111+
}
112+
}
113+
114+
private static bool CompareNested(NestedType expectedType, NestedType actualType)
115+
{
116+
if (expectedType.Fields.Count != actualType.Fields.Count)
117+
{
118+
return false;
119+
}
120+
121+
for (int i = 0; i < expectedType.Fields.Count; i++)
122+
{
123+
if (expectedType.Fields[i].DataType.TypeId != actualType.Fields[i].DataType.TypeId)
124+
{
125+
return false;
126+
}
127+
128+
var dataTypeMatch = FieldComparer.Compare(expectedType.Fields[i], actualType.Fields[i]);
129+
130+
if (!dataTypeMatch)
131+
{
132+
return false;
133+
}
134+
}
135+
136+
return true;
137+
}
138+
139+
public void Visit(IArrowType actualType)
140+
{
141+
if (_expectedType.TypeId == actualType.TypeId)
142+
{
143+
_dataTypeMatch = true;
144+
}
145+
}
146+
}
147+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one or more
2+
// contributor license agreements. See the NOTICE file distributed with
3+
// this work for additional information regarding copyright ownership.
4+
// The ASF licenses this file to You under the Apache License, Version 2.0
5+
// (the "License"); you may not use this file except in compliance with
6+
// the License. You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
16+
using System.Linq;
17+
18+
namespace Apache.Arrow
19+
{
20+
internal static class FieldComparer
21+
{
22+
public static bool Compare(Field expected, Field actual)
23+
{
24+
if (ReferenceEquals(expected, actual))
25+
{
26+
return true;
27+
}
28+
29+
if (expected.Name != actual.Name || expected.IsNullable != actual.IsNullable ||
30+
expected.HasMetadata != actual.HasMetadata)
31+
{
32+
return false;
33+
}
34+
35+
if (expected.HasMetadata)
36+
{
37+
if (expected.Metadata.Count != actual.Metadata.Count)
38+
{
39+
return false;
40+
}
41+
42+
if (!expected.Metadata.Keys.All(k => actual.Metadata.ContainsKey(k) && expected.Metadata[k] == actual.Metadata[k]))
43+
{
44+
return false;
45+
}
46+
}
47+
48+
var dataTypeComparer = new ArrayDataTypeComparer(expected.DataType);
49+
50+
actual.DataType.Accept(dataTypeComparer);
51+
52+
if (!dataTypeComparer.DataTypeMatch)
53+
{
54+
return false;
55+
}
56+
57+
return true;
58+
}
59+
}
60+
}

csharp/src/Apache.Arrow/Column.cs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,18 @@ public Column Slice(int offset)
6060

6161
private bool ValidateArrayDataTypes()
6262
{
63+
var dataTypeComparer = new ArrayDataTypeComparer(Field.DataType);
64+
6365
for (int i = 0; i < Data.ArrayCount; i++)
6466
{
65-
if (Data.Array(i).Data.DataType != Field.DataType)
67+
if (Data.Array(i).Data.DataType.TypeId != Field.DataType.TypeId)
68+
{
69+
return false;
70+
}
71+
72+
Data.Array(i).Data.DataType.Accept(dataTypeComparer);
73+
74+
if (!dataTypeComparer.DataTypeMatch)
6675
{
6776
return false;
6877
}

csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,24 @@ public void ListArrayBuilder()
101101
new List<string> { "444", null, "555", "666" },
102102
ConvertStringArrayToList(list.GetSlicedValues(3) as StringArray));
103103

104+
Assert.Throws<ArgumentOutOfRangeException>(() => list.GetValueLength(-1));
105+
Assert.Throws<ArgumentOutOfRangeException>(() => list.GetValueLength(4));
106+
107+
listBuilder.Resize(2);
108+
var truncatedList = listBuilder.Build();
109+
110+
Assert.Equal(
111+
new List<string> { "22", "33", "444", null, "555", "666" },
112+
ConvertStringArrayToList(truncatedList.GetSlicedValues(2) as StringArray));
113+
114+
Assert.Throws<ArgumentOutOfRangeException>(() => truncatedList.GetSlicedValues(-1));
115+
Assert.Throws<ArgumentOutOfRangeException>(() => truncatedList.GetSlicedValues(3));
116+
117+
listBuilder.Clear();
118+
var emptyList = listBuilder.Build();
119+
120+
Assert.Equal(0, emptyList.Length);
121+
104122
List<string> ConvertStringArrayToList(StringArray array)
105123
{
106124
var length = array.Length;

csharp/test/Apache.Arrow.Tests/ArrowFileReaderTests.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515

1616
using Apache.Arrow.Ipc;
1717
using Apache.Arrow.Memory;
18+
using Apache.Arrow.Types;
1819
using System;
20+
using System.Collections.Generic;
1921
using System.IO;
2022
using System.Threading.Tasks;
2123
using Xunit;
@@ -155,5 +157,19 @@ public async Task TestReadMultipleRecordBatchAsync()
155157
ArrowReaderVerifier.CompareBatches(originalBatch1, readBatch3);
156158
}
157159
}
160+
161+
[Fact]
162+
public void TestRecordBatchBasics()
163+
{
164+
RecordBatch recordBatch = TestData.CreateSampleRecordBatch(length: 1);
165+
Assert.Throws<ArgumentOutOfRangeException>(() => new RecordBatch(recordBatch.Schema, recordBatch.Arrays, -1));
166+
167+
var col1 = recordBatch.Column(0);
168+
var col2 = recordBatch.Column("list0");
169+
ArrowReaderVerifier.CompareArrays(col1, col2);
170+
171+
recordBatch.Dispose();
172+
}
173+
158174
}
159175
}

csharp/test/Apache.Arrow.Tests/TableTests.cs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,34 @@ public void TestTableBasics()
5151
Assert.Equal(1, table.ColumnCount);
5252
}
5353

54+
[Fact]
55+
public void TestTableFromRecordBatches()
56+
{
57+
RecordBatch recordBatch1 = TestData.CreateSampleRecordBatch(length: 10, true);
58+
RecordBatch recordBatch2 = TestData.CreateSampleRecordBatch(length: 10, true);
59+
IList<RecordBatch> recordBatches = new List<RecordBatch>() { recordBatch1, recordBatch2 };
60+
61+
Table table1 = Table.TableFromRecordBatches(recordBatch1.Schema, recordBatches);
62+
Assert.Equal(20, table1.RowCount);
63+
Assert.Equal(21, table1.ColumnCount);
64+
65+
FixedSizeBinaryType type = new FixedSizeBinaryType(17);
66+
Field newField1 = new Field(type.Name, type, false);
67+
Schema newSchema1 = recordBatch1.Schema.SetField(20, newField1);
68+
Assert.Throws<ArgumentException>(() => Table.TableFromRecordBatches(newSchema1, recordBatches));
69+
70+
List<Field> fields = new List<Field>();
71+
Field.Builder fieldBuilder = new Field.Builder();
72+
fields.Add(fieldBuilder.Name("Ints").DataType(Int32Type.Default).Nullable(true).Build());
73+
fieldBuilder = new Field.Builder();
74+
fields.Add(fieldBuilder.Name("Strings").DataType(StringType.Default).Nullable(true).Build());
75+
StructType structType = new StructType(fields);
76+
77+
Field newField2 = new Field(structType.Name, structType, false);
78+
Schema newSchema2 = recordBatch1.Schema.SetField(16, newField2);
79+
Assert.Throws<ArgumentException>(() => Table.TableFromRecordBatches(newSchema2, recordBatches));
80+
}
81+
5482
[Fact]
5583
public void TestTableAddRemoveAndSetColumn()
5684
{

0 commit comments

Comments
 (0)