@@ -224,7 +224,7 @@ std::shared_ptr<Table> GenerateRandomTable(const std::shared_ptr<Schema>& schema
224224 return Table::Make (schema, cv);
225225}
226226
227- void AssertTableWriteReadEqual (const std::shared_ptr<Table>& input_table ,
227+ void AssertTableWriteReadEqual (const std::vector<std:: shared_ptr<Table>>& input_tables ,
228228 const std::shared_ptr<Table>& expected_output_table,
229229 const int64_t max_size = kDefaultSmallMemStreamSize ,
230230 std::vector<int >* opt_selected_read_indices = nullptr ) {
@@ -241,7 +241,9 @@ void AssertTableWriteReadEqual(const std::shared_ptr<Table>& input_table,
241241 write_options.row_index_stride = 5000 ;
242242 EXPECT_OK_AND_ASSIGN (auto writer, adapters::orc::ORCFileWriter::Open (
243243 buffer_output_stream.get (), write_options));
244- ARROW_EXPECT_OK (writer->Write (*input_table));
244+ for (const auto & input_table : input_tables) {
245+ ARROW_EXPECT_OK (writer->Write (*input_table));
246+ }
245247 ARROW_EXPECT_OK (writer->Close ());
246248 EXPECT_OK_AND_ASSIGN (auto buffer, buffer_output_stream->Finish ());
247249 std::shared_ptr<io::RandomAccessFile> in_stream (new io::BufferReader (buffer));
@@ -259,6 +261,48 @@ void AssertTableWriteReadEqual(const std::shared_ptr<Table>& input_table,
259261 AssertTablesEqual (*expected_output_table, *actual_output_table, false , false );
260262}
261263
264+ void AssertBatchWriteReadEqual (
265+ const std::vector<std::shared_ptr<RecordBatch>>& input_batches,
266+ const std::shared_ptr<Table>& expected_output_table,
267+ const int64_t max_size = kDefaultSmallMemStreamSize ) {
268+ EXPECT_OK_AND_ASSIGN (auto buffer_output_stream,
269+ io::BufferOutputStream::Create (max_size));
270+ auto write_options = adapters::orc::WriteOptions ();
271+ #ifdef ARROW_WITH_SNAPPY
272+ write_options.compression = Compression::SNAPPY ;
273+ #else
274+ write_options.compression = Compression::UNCOMPRESSED ;
275+ #endif
276+ write_options.file_version = adapters::orc::FileVersion (0 , 11 );
277+ write_options.compression_block_size = 32768 ;
278+ write_options.row_index_stride = 5000 ;
279+ EXPECT_OK_AND_ASSIGN (auto writer, adapters::orc::ORCFileWriter::Open (
280+ buffer_output_stream.get (), write_options));
281+ for (auto & input_batch : input_batches) {
282+ ARROW_EXPECT_OK (writer->Write (*input_batch));
283+ }
284+ ARROW_EXPECT_OK (writer->Close ());
285+ EXPECT_OK_AND_ASSIGN (auto buffer, buffer_output_stream->Finish ());
286+ std::shared_ptr<io::RandomAccessFile> in_stream (new io::BufferReader (buffer));
287+ EXPECT_OK_AND_ASSIGN (
288+ auto reader, adapters::orc::ORCFileReader::Open (in_stream, default_memory_pool ()));
289+ ASSERT_EQ (reader->GetFileVersion (), write_options.file_version );
290+ ASSERT_EQ (reader->GetCompression (), write_options.compression );
291+ ASSERT_EQ (reader->GetCompressionSize (), write_options.compression_block_size );
292+ ASSERT_EQ (reader->GetRowIndexStride (), write_options.row_index_stride );
293+ EXPECT_OK_AND_ASSIGN (auto actual_output_table, reader->Read ());
294+ AssertTablesEqual (*expected_output_table, *actual_output_table, false , false );
295+ }
296+
297+ void AssertTableWriteReadEqual (const std::shared_ptr<Table>& input_table,
298+ const std::shared_ptr<Table>& expected_output_table,
299+ const int64_t max_size = kDefaultSmallMemStreamSize ,
300+ std::vector<int >* opt_selected_read_indices = nullptr ) {
301+ std::vector<std::shared_ptr<Table>> input_tables;
302+ input_tables.push_back (input_table);
303+ AssertTableWriteReadEqual (input_tables, expected_output_table, max_size,
304+ opt_selected_read_indices);
305+ }
262306void AssertArrayWriteReadEqual (const std::shared_ptr<Array>& input_array,
263307 const std::shared_ptr<Array>& expected_output_array,
264308 const int64_t max_size = kDefaultSmallMemStreamSize ) {
@@ -767,4 +811,69 @@ TEST_F(TestORCWriterSingleArray, WriteListOfMap) {
767811 AssertArrayWriteReadEqual (array, array, kDefaultSmallMemStreamSize * 10 );
768812}
769813
814+ class TestORCWriterMultipleWrite : public ::testing::Test {
815+ public:
816+ TestORCWriterMultipleWrite () : rand(kRandomSeed ) {}
817+
818+ protected:
819+ random::RandomArrayGenerator rand;
820+ };
821+
822+ TEST_F (TestORCWriterMultipleWrite, MultipleWritesIntField) {
823+ const int64_t num_rows = 1234 ;
824+ const int num_writes = 5 ;
825+ std::shared_ptr<Schema> input_schema = schema ({field (" col0" , int32 ())});
826+ ArrayVector vect;
827+ std::vector<std::shared_ptr<Table>> input_tables;
828+ for (int i = 0 ; i < num_writes; i++) {
829+ auto array_int = rand.ArrayOf (int32 (), num_rows, 0 );
830+ vect.push_back (array_int);
831+ auto input_chunked_array = std::make_shared<ChunkedArray>(array_int);
832+ input_tables.emplace_back (Table::Make (input_schema, {input_chunked_array}));
833+ }
834+ auto expected_output_chunked_array = std::make_shared<ChunkedArray>(vect);
835+ std::shared_ptr<Table> expected_output_table =
836+ Table::Make (input_schema, {expected_output_chunked_array});
837+ AssertTableWriteReadEqual (input_tables, expected_output_table,
838+ kDefaultSmallMemStreamSize * 100 );
839+ }
840+
841+ TEST_F (TestORCWriterMultipleWrite, MultipleWritesIncoherentSchema) {
842+ const int64_t num_rows = 1234 ;
843+ auto array_int = rand.ArrayOf (int32 (), num_rows, 0 );
844+ std::shared_ptr<Schema> input_schema = schema ({field (" col0" , array_int->type ())});
845+ auto array_int2 = rand.ArrayOf (int64 (), num_rows, 0 );
846+ std::shared_ptr<Schema> input_schema2 = schema ({field (" col0" , array_int2->type ())});
847+
848+ std::shared_ptr<Table> input_table = Table::Make (input_schema, {array_int});
849+ std::shared_ptr<Table> input_table2 = Table::Make (input_schema2, {array_int2});
850+ EXPECT_OK_AND_ASSIGN (auto buffer_output_stream,
851+ io::BufferOutputStream::Create (kDefaultSmallMemStreamSize ));
852+ auto write_options = adapters::orc::WriteOptions ();
853+ EXPECT_OK_AND_ASSIGN (auto writer, adapters::orc::ORCFileWriter::Open (
854+ buffer_output_stream.get (), write_options));
855+ ARROW_EXPECT_OK (writer->Write (*input_table));
856+
857+ // This should not pass
858+ ASSERT_RAISES (TypeError, writer->Write (*input_table2));
859+
860+ ARROW_EXPECT_OK (writer->Close ());
861+ }
862+ TEST_F (TestORCWriterMultipleWrite, MultipleWritesIntFieldRecordBatch) {
863+ const int64_t num_rows = 1234 ;
864+ const int num_writes = 5 ;
865+ std::shared_ptr<Schema> input_schema = schema ({field (" col0" , int32 ())});
866+ ArrayVector vect;
867+ std::vector<std::shared_ptr<RecordBatch>> input_batches;
868+ for (int i = 0 ; i < num_writes; i++) {
869+ auto array_int = rand.ArrayOf (int32 (), num_rows, 0 );
870+ vect.push_back (array_int);
871+ input_batches.emplace_back (RecordBatch::Make (input_schema, num_rows, {array_int}));
872+ }
873+ auto expected_output_chunked_array = std::make_shared<ChunkedArray>(vect);
874+ std::shared_ptr<Table> expected_output_table =
875+ Table::Make (input_schema, {expected_output_chunked_array});
876+ AssertBatchWriteReadEqual (input_batches, expected_output_table,
877+ kDefaultSmallMemStreamSize * 100 );
878+ }
770879} // namespace arrow
0 commit comments