Skip to content

Commit 7d433dc

Browse files
cpcloudwesm
authored andcommitted
ARROW-483: [C++/Python] Provide access to "custom_metadata" Field attribute in IPC setting
Author: Phillip Cloud <cpcloud@gmail.com> Closes apache#588 from cpcloud/ARROW-483 and squashes the following commits: f671ba4 [Phillip Cloud] ARROW-483: [C++/Python] Provide access to "custom_metadata" Field attribute in IPC setting
1 parent 949249d commit 7d433dc

18 files changed

Lines changed: 401 additions & 43 deletions

cpp/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -944,6 +944,7 @@ set(ARROW_SRCS
944944
945945
src/arrow/util/bit-util.cc
946946
src/arrow/util/decimal.cc
947+
src/arrow/util/key_value_metadata.cc
947948
)
948949
949950
if (ARROW_IPC)

cpp/src/arrow/array.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ Status Array::Validate() const {
113113
static inline void ConformSliceParams(
114114
int64_t array_offset, int64_t array_length, int64_t* offset, int64_t* length) {
115115
DCHECK_LE(*offset, array_length);
116-
DCHECK_GE(offset, 0);
116+
DCHECK_NE(offset, nullptr);
117117
*length = std::min(array_length - *offset, *length);
118118
*offset = array_offset + *offset;
119119
}

cpp/src/arrow/builder.cc

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -363,8 +363,6 @@ ARROW_EXPORT Status DecimalBuilder::Append(const decimal::Decimal128& value) {
363363
return Status::OK();
364364
}
365365

366-
template ARROW_EXPORT Status DecimalBuilder::Append(const decimal::Decimal128& val);
367-
368366
Status DecimalBuilder::Init(int64_t capacity) {
369367
RETURN_NOT_OK(FixedSizeBinaryBuilder::Init(capacity));
370368
if (byte_width_ == 16) {
@@ -408,16 +406,17 @@ Status DecimalBuilder::Finish(std::shared_ptr<Array>* out) {
408406

409407
ListBuilder::ListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> value_builder,
410408
const std::shared_ptr<DataType>& type)
411-
: ArrayBuilder(
412-
pool, type ? type : std::static_pointer_cast<DataType>(
413-
std::make_shared<ListType>(value_builder->type()))),
409+
: ArrayBuilder(pool,
410+
type ? type : std::static_pointer_cast<DataType>(
411+
std::make_shared<ListType>(value_builder->type()))),
414412
offset_builder_(pool),
415413
value_builder_(value_builder) {}
416414

417415
ListBuilder::ListBuilder(MemoryPool* pool, std::shared_ptr<Array> values,
418416
const std::shared_ptr<DataType>& type)
419-
: ArrayBuilder(pool, type ? type : std::static_pointer_cast<DataType>(
420-
std::make_shared<ListType>(values->type()))),
417+
: ArrayBuilder(pool,
418+
type ? type : std::static_pointer_cast<DataType>(
419+
std::make_shared<ListType>(values->type()))),
421420
offset_builder_(pool),
422421
values_(values) {}
423422

cpp/src/arrow/ipc/metadata.cc

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ namespace ipc {
4545
using FBB = flatbuffers::FlatBufferBuilder;
4646
using DictionaryOffset = flatbuffers::Offset<flatbuf::DictionaryEncoding>;
4747
using FieldOffset = flatbuffers::Offset<flatbuf::Field>;
48+
using KeyValueOffset = flatbuffers::Offset<flatbuf::KeyValue>;
4849
using RecordBatchOffset = flatbuffers::Offset<flatbuf::RecordBatch>;
4950
using VectorLayoutOffset = flatbuffers::Offset<arrow::flatbuf::VectorLayout>;
5051
using Offset = flatbuffers::Offset<void>;
@@ -583,6 +584,7 @@ flatbuf::Endianness endianness() {
583584

584585
static Status SchemaToFlatbuffer(FBB& fbb, const Schema& schema,
585586
DictionaryMemo* dictionary_memo, flatbuffers::Offset<flatbuf::Schema>* out) {
587+
/// Fields
586588
std::vector<FieldOffset> field_offsets;
587589
for (int i = 0; i < schema.num_fields(); ++i) {
588590
std::shared_ptr<Field> field = schema.field(i);
@@ -591,7 +593,20 @@ static Status SchemaToFlatbuffer(FBB& fbb, const Schema& schema,
591593
field_offsets.push_back(offset);
592594
}
593595

594-
*out = flatbuf::CreateSchema(fbb, endianness(), fbb.CreateVector(field_offsets));
596+
/// Custom metadata
597+
const auto& custom_metadata_ = schema.custom_metadata();
598+
std::vector<KeyValueOffset> key_value_offsets;
599+
size_t metadata_size = custom_metadata_.size();
600+
key_value_offsets.reserve(metadata_size);
601+
for (size_t i = 0; i < metadata_size; ++i) {
602+
const auto& key = custom_metadata_.key(i);
603+
const auto& value = custom_metadata_.value(i);
604+
key_value_offsets.push_back(
605+
flatbuf::CreateKeyValue(fbb, fbb.CreateString(key), fbb.CreateString(value)));
606+
}
607+
608+
*out = flatbuf::CreateSchema(fbb, endianness(), fbb.CreateVector(field_offsets),
609+
fbb.CreateVector(key_value_offsets));
595610
return Status::OK();
596611
}
597612

@@ -939,7 +954,18 @@ Status GetSchema(const void* opaque_schema, const DictionaryMemo& dictionary_mem
939954
const flatbuf::Field* field = schema->fields()->Get(i);
940955
RETURN_NOT_OK(FieldFromFlatbuffer(field, dictionary_memo, &fields[i]));
941956
}
942-
*out = std::make_shared<Schema>(fields);
957+
958+
KeyValueMetadata custom_metadata;
959+
auto fb_metadata = schema->custom_metadata();
960+
if (fb_metadata != nullptr) {
961+
custom_metadata.reserve(fb_metadata->size());
962+
963+
for (const auto& pair : *fb_metadata) {
964+
custom_metadata.Append(pair->key()->str(), pair->value()->str());
965+
}
966+
}
967+
968+
*out = std::make_shared<Schema>(fields, custom_metadata);
943969
return Status::OK();
944970
}
945971

cpp/src/arrow/type-test.cc

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,40 @@ TEST_F(TestSchema, GetFieldByName) {
117117
ASSERT_TRUE(result == nullptr);
118118
}
119119

120+
TEST_F(TestSchema, TestCustomMetadataConstruction) {
121+
auto f0 = field("f0", int32());
122+
auto f1 = field("f1", uint8(), false);
123+
auto f2 = field("f2", utf8());
124+
vector<shared_ptr<Field>> fields = {f0, f1, f2};
125+
KeyValueMetadata metadata({"foo", "bar"}, {"bizz", "buzz"});
126+
auto schema = std::make_shared<Schema>(fields, metadata);
127+
ASSERT_TRUE(metadata.Equals(schema->custom_metadata()));
128+
}
129+
130+
TEST_F(TestSchema, TestAddCustomMetadata) {
131+
auto f0 = field("f0", int32());
132+
auto f1 = field("f1", uint8(), false);
133+
auto f2 = field("f2", utf8());
134+
vector<shared_ptr<Field>> fields = {f0, f1, f2};
135+
KeyValueMetadata metadata({"foo", "bar"}, {"bizz", "buzz"});
136+
auto schema = std::make_shared<Schema>(fields);
137+
std::shared_ptr<Schema> new_schema;
138+
schema->AddCustomMetadata(metadata, &new_schema);
139+
ASSERT_TRUE(metadata.Equals(new_schema->custom_metadata()));
140+
}
141+
142+
TEST_F(TestSchema, TestRemoveCustomMetadata) {
143+
auto f0 = field("f0", int32());
144+
auto f1 = field("f1", uint8(), false);
145+
auto f2 = field("f2", utf8());
146+
vector<shared_ptr<Field>> fields = {f0, f1, f2};
147+
KeyValueMetadata metadata({"foo", "bar"}, {"bizz", "buzz"});
148+
auto schema = std::make_shared<Schema>(fields);
149+
std::shared_ptr<Schema> new_schema;
150+
schema->RemoveCustomMetadata(&new_schema);
151+
ASSERT_EQ(0, new_schema->custom_metadata().size());
152+
}
153+
120154
#define PRIMITIVE_TEST(KLASS, ENUM, NAME) \
121155
TEST(TypesTest, TestPrimitive_##ENUM) { \
122156
KLASS tp; \

cpp/src/arrow/type.cc

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "arrow/array.h"
2525
#include "arrow/compare.h"
2626
#include "arrow/status.h"
27+
#include "arrow/util/key_value_metadata.h"
2728
#include "arrow/util/logging.h"
2829
#include "arrow/util/stl.h"
2930
#include "arrow/visitor.h"
@@ -231,7 +232,9 @@ std::string NullType::ToString() const {
231232
// ----------------------------------------------------------------------
232233
// Schema implementation
233234

234-
Schema::Schema(const std::vector<std::shared_ptr<Field>>& fields) : fields_(fields) {}
235+
Schema::Schema(const std::vector<std::shared_ptr<Field>>& fields,
236+
const KeyValueMetadata& custom_metadata)
237+
: fields_(fields), custom_metadata_(custom_metadata) {}
235238

236239
bool Schema::Equals(const Schema& other) const {
237240
if (this == &other) { return true; }
@@ -263,15 +266,26 @@ Status Schema::AddField(
263266
DCHECK_GE(i, 0);
264267
DCHECK_LE(i, this->num_fields());
265268

266-
*out = std::make_shared<Schema>(AddVectorElement(fields_, i, field));
269+
*out = std::make_shared<Schema>(AddVectorElement(fields_, i, field), custom_metadata_);
270+
return Status::OK();
271+
}
272+
273+
Status Schema::AddCustomMetadata(
274+
const KeyValueMetadata& custom_metadata, std::shared_ptr<Schema>* out) const {
275+
*out = std::make_shared<Schema>(fields_, custom_metadata);
276+
return Status::OK();
277+
}
278+
279+
Status Schema::RemoveCustomMetadata(std::shared_ptr<Schema>* out) {
280+
*out = std::make_shared<Schema>(fields_, KeyValueMetadata());
267281
return Status::OK();
268282
}
269283

270284
Status Schema::RemoveField(int i, std::shared_ptr<Schema>* out) const {
271285
DCHECK_GE(i, 0);
272286
DCHECK_LT(i, this->num_fields());
273287

274-
*out = std::make_shared<Schema>(DeleteVectorElement(fields_, i));
288+
*out = std::make_shared<Schema>(DeleteVectorElement(fields_, i), custom_metadata_);
275289
return Status::OK();
276290
}
277291

cpp/src/arrow/type.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
#include "arrow/status.h"
3030
#include "arrow/type_fwd.h"
31+
#include "arrow/util/key_value_metadata.h"
3132
#include "arrow/util/macros.h"
3233
#include "arrow/util/visibility.h"
3334
#include "arrow/visitor.h"
@@ -677,7 +678,8 @@ class ARROW_EXPORT DictionaryType : public FixedWidthType {
677678

678679
class ARROW_EXPORT Schema {
679680
public:
680-
explicit Schema(const std::vector<std::shared_ptr<Field>>& fields);
681+
explicit Schema(const std::vector<std::shared_ptr<Field>>& fields,
682+
const KeyValueMetadata& custom_metadata = KeyValueMetadata());
681683

682684
// Returns true if all of the schema fields are equal
683685
bool Equals(const Schema& other) const;
@@ -689,6 +691,7 @@ class ARROW_EXPORT Schema {
689691
std::shared_ptr<Field> GetFieldByName(const std::string& name);
690692

691693
const std::vector<std::shared_ptr<Field>>& fields() const { return fields_; }
694+
const KeyValueMetadata& custom_metadata() const { return custom_metadata_; }
692695

693696
// Render a string representation of the schema suitable for debugging
694697
std::string ToString() const;
@@ -697,11 +700,16 @@ class ARROW_EXPORT Schema {
697700
int i, const std::shared_ptr<Field>& field, std::shared_ptr<Schema>* out) const;
698701
Status RemoveField(int i, std::shared_ptr<Schema>* out) const;
699702

703+
Status AddCustomMetadata(
704+
const KeyValueMetadata& metadata, std::shared_ptr<Schema>* out) const;
705+
Status RemoveCustomMetadata(std::shared_ptr<Schema>* out);
706+
700707
int num_fields() const { return static_cast<int>(fields_.size()); }
701708

702709
private:
703710
std::vector<std::shared_ptr<Field>> fields_;
704711
std::unordered_map<std::string, int> name_to_index_;
712+
KeyValueMetadata custom_metadata_;
705713
};
706714

707715
// ----------------------------------------------------------------------

cpp/src/arrow/util/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ install(FILES
2626
macros.h
2727
random.h
2828
visibility.h
29+
key_value_metadata.h
2930
DESTINATION include/arrow/util)
3031

3132
#######################################
@@ -52,3 +53,4 @@ endif()
5253
ADD_ARROW_TEST(bit-util-test)
5354
ADD_ARROW_TEST(stl-util-test)
5455
ADD_ARROW_TEST(decimal-test)
56+
ADD_ARROW_TEST(key-value-metadata-test)
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include "gtest/gtest.h"
19+
20+
#include "arrow/util/key_value_metadata.h"
21+
22+
#include "arrow/test-util.h"
23+
24+
namespace arrow {
25+
26+
TEST(KeyValueMetadataTest, SimpleConstruction) {
27+
KeyValueMetadata metadata;
28+
ASSERT_EQ(0, metadata.size());
29+
}
30+
31+
TEST(KeyValueMetadataTest, StringVectorConstruction) {
32+
std::vector<std::string> keys = {"foo", "bar"};
33+
std::vector<std::string> values = {"bizz", "buzz"};
34+
35+
KeyValueMetadata metadata(keys, values);
36+
ASSERT_EQ("foo", metadata.key(0));
37+
ASSERT_EQ("bar", metadata.key(1));
38+
ASSERT_EQ("bizz", metadata.value(0));
39+
ASSERT_EQ("buzz", metadata.value(1));
40+
ASSERT_EQ(2, metadata.size());
41+
}
42+
43+
TEST(KeyValueMetadataTest, StringMapConstruction) {
44+
std::unordered_map<std::string, std::string> pairs = {{"foo", "bizz"}, {"bar", "buzz"}};
45+
std::unordered_map<std::string, std::string> result_map;
46+
result_map.reserve(pairs.size());
47+
48+
KeyValueMetadata metadata(pairs);
49+
metadata.ToUnorderedMap(&result_map);
50+
ASSERT_EQ(pairs, result_map);
51+
ASSERT_EQ(2, metadata.size());
52+
}
53+
54+
TEST(KeyValueMetadataTest, StringAppend) {
55+
std::vector<std::string> keys = {"foo", "bar"};
56+
std::vector<std::string> values = {"bizz", "buzz"};
57+
58+
KeyValueMetadata metadata(keys, values);
59+
ASSERT_EQ("foo", metadata.key(0));
60+
ASSERT_EQ("bar", metadata.key(1));
61+
ASSERT_EQ("bizz", metadata.value(0));
62+
ASSERT_EQ("buzz", metadata.value(1));
63+
ASSERT_EQ(2, metadata.size());
64+
65+
metadata.Append("purple", "orange");
66+
metadata.Append("blue", "red");
67+
68+
ASSERT_EQ("purple", metadata.key(2));
69+
ASSERT_EQ("blue", metadata.key(3));
70+
71+
ASSERT_EQ("orange", metadata.value(2));
72+
ASSERT_EQ("red", metadata.value(3));
73+
}
74+
75+
TEST(KeyValueMetadataTest, Equals) {
76+
std::vector<std::string> keys = {"foo", "bar"};
77+
std::vector<std::string> values = {"bizz", "buzz"};
78+
79+
KeyValueMetadata metadata(keys, values);
80+
KeyValueMetadata metadata2(keys, values);
81+
KeyValueMetadata metadata3(keys, {"buzz", "bizz"});
82+
83+
ASSERT_TRUE(metadata.Equals(metadata2));
84+
ASSERT_FALSE(metadata.Equals(metadata3));
85+
}
86+
87+
} // namespace arrow

0 commit comments

Comments
 (0)