|
25 | 25 |
|
26 | 26 | #include <cstdint> |
27 | 27 | #include <functional> |
28 | | -#include <iostream> |
29 | 28 | #include <sstream> |
30 | 29 | #include <vector> |
31 | 30 |
|
@@ -3432,6 +3431,25 @@ TEST(ArrowReadWrite, NestedRequiredOuterOptionalDecimal) { |
3432 | 3431 | } |
3433 | 3432 | } |
3434 | 3433 |
|
| 3434 | +TEST(ArrowReadWrite, Decimal256AsInt) { |
| 3435 | + using ::arrow::Decimal256; |
| 3436 | + using ::arrow::field; |
| 3437 | + |
| 3438 | + auto type = ::arrow::decimal256(8, 4); |
| 3439 | + |
| 3440 | + const char* json = R"(["1.0000", null, "-1.2345", "-1000.5678", |
| 3441 | + "-9999.9999", "9999.9999"])"; |
| 3442 | + auto array = ::arrow::ArrayFromJSON(type, json); |
| 3443 | + auto table = ::arrow::Table::Make(::arrow::schema({field("root", type)}), {array}); |
| 3444 | + |
| 3445 | + parquet::WriterProperties::Builder builder; |
| 3446 | + // Enforce integer type to annotate decimal type |
| 3447 | + auto writer_properties = builder.enable_integer_annotate_decimal()->build(); |
| 3448 | + auto props_store_schema = ArrowWriterProperties::Builder().store_schema()->build(); |
| 3449 | + |
| 3450 | + CheckConfiguredRoundtrip(table, table, writer_properties, props_store_schema); |
| 3451 | +} |
| 3452 | + |
3435 | 3453 | class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> { |
3436 | 3454 | protected: |
3437 | 3455 | // make it *3 to make it easily divisible by 3 |
@@ -4796,5 +4814,83 @@ std::vector<NestedFilterTestCase> GenerateMapFilteredTestCases() { |
4796 | 4814 | INSTANTIATE_TEST_SUITE_P(MapFilteredReads, TestNestedSchemaFilteredReader, |
4797 | 4815 | ::testing::ValuesIn(GenerateMapFilteredTestCases())); |
4798 | 4816 |
|
| 4817 | +template <typename TestType> |
| 4818 | +class TestIntegerAnnotateDecimalTypeParquetIO : public TestParquetIO<TestType> { |
| 4819 | + public: |
| 4820 | + void WriteColumn(const std::shared_ptr<Array>& values) { |
| 4821 | + auto arrow_schema = ::arrow::schema({::arrow::field("a", values->type())}); |
| 4822 | + |
| 4823 | + parquet::WriterProperties::Builder builder; |
| 4824 | + // Enforce integer type to annotate decimal type |
| 4825 | + auto writer_properties = builder.enable_integer_annotate_decimal()->build(); |
| 4826 | + std::shared_ptr<SchemaDescriptor> parquet_schema; |
| 4827 | + ASSERT_OK_NO_THROW(ToParquetSchema(arrow_schema.get(), *writer_properties, |
| 4828 | + *default_arrow_writer_properties(), |
| 4829 | + &parquet_schema)); |
| 4830 | + |
| 4831 | + this->sink_ = CreateOutputStream(); |
| 4832 | + auto schema_node = std::static_pointer_cast<GroupNode>(parquet_schema->schema_root()); |
| 4833 | + |
| 4834 | + std::unique_ptr<FileWriter> writer; |
| 4835 | + ASSERT_OK_NO_THROW(FileWriter::Make( |
| 4836 | + ::arrow::default_memory_pool(), |
| 4837 | + ParquetFileWriter::Open(this->sink_, schema_node, writer_properties), |
| 4838 | + arrow_schema, default_arrow_writer_properties(), &writer)); |
| 4839 | + ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length())); |
| 4840 | + ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values)); |
| 4841 | + ASSERT_OK_NO_THROW(writer->Close()); |
| 4842 | + } |
| 4843 | + |
| 4844 | + void ReadAndCheckSingleDecimalColumnFile(const Array& values) { |
| 4845 | + std::shared_ptr<Array> out; |
| 4846 | + std::unique_ptr<FileReader> reader; |
| 4847 | + this->ReaderFromSink(&reader); |
| 4848 | + this->ReadSingleColumnFile(std::move(reader), &out); |
| 4849 | + |
| 4850 | + // Reader always read values as DECIMAL128 type |
| 4851 | + ASSERT_EQ(out->type()->id(), ::arrow::Type::DECIMAL128); |
| 4852 | + |
| 4853 | + if (values.type()->id() == ::arrow::Type::DECIMAL128) { |
| 4854 | + AssertArraysEqual(values, *out); |
| 4855 | + } else { |
| 4856 | + auto& expected_values = dynamic_cast<const ::arrow::Decimal256Array&>(values); |
| 4857 | + auto& read_values = dynamic_cast<const ::arrow::Decimal128Array&>(*out); |
| 4858 | + ASSERT_EQ(expected_values.length(), read_values.length()); |
| 4859 | + ASSERT_EQ(expected_values.null_count(), read_values.null_count()); |
| 4860 | + ASSERT_EQ(expected_values.length(), read_values.length()); |
| 4861 | + for (int64_t i = 0; i < expected_values.length(); ++i) { |
| 4862 | + ASSERT_EQ(expected_values.IsNull(i), read_values.IsNull(i)); |
| 4863 | + if (!expected_values.IsNull(i)) { |
| 4864 | + ASSERT_EQ(::arrow::Decimal256(expected_values.Value(i)).ToString(0), |
| 4865 | + ::arrow::Decimal128(read_values.Value(i)).ToString(0)); |
| 4866 | + } |
| 4867 | + } |
| 4868 | + } |
| 4869 | + } |
| 4870 | +}; |
| 4871 | + |
| 4872 | +typedef ::testing::Types< |
| 4873 | + DecimalWithPrecisionAndScale<1>, DecimalWithPrecisionAndScale<5>, |
| 4874 | + DecimalWithPrecisionAndScale<10>, DecimalWithPrecisionAndScale<18>, |
| 4875 | + Decimal256WithPrecisionAndScale<1>, Decimal256WithPrecisionAndScale<5>, |
| 4876 | + Decimal256WithPrecisionAndScale<10>, Decimal256WithPrecisionAndScale<18>> |
| 4877 | + DecimalTestTypes; |
| 4878 | + |
| 4879 | +TYPED_TEST_SUITE(TestIntegerAnnotateDecimalTypeParquetIO, DecimalTestTypes); |
| 4880 | + |
| 4881 | +TYPED_TEST(TestIntegerAnnotateDecimalTypeParquetIO, SingleNonNullableDecimalColumn) { |
| 4882 | + std::shared_ptr<Array> values; |
| 4883 | + ASSERT_OK(NonNullArray<TypeParam>(SMALL_SIZE, &values)); |
| 4884 | + ASSERT_NO_FATAL_FAILURE(this->WriteColumn(values)); |
| 4885 | + ASSERT_NO_FATAL_FAILURE(this->ReadAndCheckSingleDecimalColumnFile(*values)); |
| 4886 | +} |
| 4887 | + |
| 4888 | +TYPED_TEST(TestIntegerAnnotateDecimalTypeParquetIO, SingleNullableDecimalColumn) { |
| 4889 | + std::shared_ptr<Array> values; |
| 4890 | + ASSERT_OK(NullableArray<TypeParam>(SMALL_SIZE, SMALL_SIZE / 2, kDefaultSeed, &values)); |
| 4891 | + ASSERT_NO_FATAL_FAILURE(this->WriteColumn(values)); |
| 4892 | + ASSERT_NO_FATAL_FAILURE(this->ReadAndCheckSingleDecimalColumnFile(*values)); |
| 4893 | +} |
| 4894 | + |
4799 | 4895 | } // namespace arrow |
4800 | 4896 | } // namespace parquet |
0 commit comments