Skip to content

Commit 1fb857f

Browse files
vibhathakou
andauthored
ARROW-16686: [C++] Use shared_ptr with FunctionOptions (apache#13344)
Includes changes to remove raw pointer usage with `FunctionOptions` used in `Aggregate`. This also includes changes in removing `keep_alives` from R aggregate bindings as well. Lead-authored-by: Vibhatha Abeykoon <vibhatha@gmail.com> Co-authored-by: Vibhatha Lakmal Abeykoon <vibhatha@users.noreply.github.com> Co-authored-by: Sutou Kouhei <kou@clear-code.com> Signed-off-by: David Li <li.davidm96@gmail.com>
1 parent ac0949b commit 1fb857f

11 files changed

Lines changed: 277 additions & 255 deletions

File tree

c_glib/arrow-glib/compute.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1265,7 +1265,14 @@ garrow_aggregate_node_options_new(GList *aggregations,
12651265
function_options =
12661266
garrow_function_options_get_raw(aggregation_priv->options);
12671267
};
1268-
arrow_aggregates.push_back({aggregation_priv->function, function_options});
1268+
if (function_options) {
1269+
arrow_aggregates.push_back({
1270+
aggregation_priv->function,
1271+
function_options->Copy(),
1272+
});
1273+
} else {
1274+
arrow_aggregates.push_back({aggregation_priv->function, nullptr});
1275+
};
12691276
if (!garrow_field_refs_add(arrow_targets,
12701277
aggregation_priv->input,
12711278
error,

cpp/examples/arrow/execution_plan_documentation_examples.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ arrow::Status SourceScalarAggregateSinkExample(cp::ExecContext& exec_context) {
507507
/*names=*/{"sum(a)"}};
508508
ARROW_ASSIGN_OR_RAISE(
509509
cp::ExecNode * aggregate,
510-
cp::MakeExecNode("aggregate", plan.get(), {source}, aggregate_options));
510+
cp::MakeExecNode("aggregate", plan.get(), {source}, std::move(aggregate_options)));
511511

512512
ARROW_RETURN_NOT_OK(
513513
cp::MakeExecNode("sink", plan.get(), {aggregate}, cp::SinkNodeOptions{&sink_gen}));
@@ -539,9 +539,9 @@ arrow::Status SourceGroupAggregateSinkExample(cp::ExecContext& exec_context) {
539539

540540
ARROW_ASSIGN_OR_RAISE(cp::ExecNode * source,
541541
cp::MakeExecNode("source", plan.get(), {}, source_node_options));
542-
cp::CountOptions options(cp::CountOptions::ONLY_VALID);
542+
auto options = std::make_shared<cp::CountOptions>(cp::CountOptions::ONLY_VALID);
543543
auto aggregate_options =
544-
cp::AggregateNodeOptions{/*aggregates=*/{{"hash_count", &options}},
544+
cp::AggregateNodeOptions{/*aggregates=*/{{"hash_count", options}},
545545
/*targets=*/{"a"},
546546
/*names=*/{"count(a)"},
547547
/*keys=*/{"b"}};

cpp/src/arrow/compute/api_aggregate.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ struct ARROW_EXPORT Aggregate {
401401
std::string function;
402402

403403
/// options for the aggregation function
404-
const FunctionOptions* options;
404+
std::shared_ptr<FunctionOptions> options;
405405
};
406406

407407
} // namespace internal

cpp/src/arrow/compute/exec/aggregate.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
5555
std::vector<std::unique_ptr<KernelState>> states(kernels.size());
5656

5757
for (size_t i = 0; i < aggregates.size(); ++i) {
58-
auto options = aggregates[i].options;
58+
const FunctionOptions* options = aggregates[i].options.get();
5959

6060
if (options == nullptr) {
6161
// use known default options for the named function if possible

cpp/src/arrow/compute/exec/aggregate_node.cc

Lines changed: 15 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,16 @@ namespace compute {
4343

4444
namespace {
4545

46-
void AggregatesToString(
47-
std::stringstream* ss, const Schema& input_schema,
48-
const std::vector<internal::Aggregate>& aggs,
49-
const std::vector<int>& target_field_ids,
50-
const std::vector<std::unique_ptr<FunctionOptions>>& owned_options, int indent = 0) {
46+
void AggregatesToString(std::stringstream* ss, const Schema& input_schema,
47+
const std::vector<internal::Aggregate>& aggs,
48+
const std::vector<int>& target_field_ids, int indent = 0) {
5149
*ss << "aggregates=[" << std::endl;
5250
for (size_t i = 0; i < aggs.size(); i++) {
5351
for (int j = 0; j < indent; ++j) *ss << " ";
5452
*ss << '\t' << aggs[i].function << '('
5553
<< input_schema.field(target_field_ids[i])->name();
56-
if (owned_options[i]) {
57-
*ss << ", " << owned_options[i]->ToString();
54+
if (aggs[i].options) {
55+
*ss << ", " << aggs[i].options->ToString();
5856
}
5957
*ss << ")," << std::endl;
6058
}
@@ -69,16 +67,14 @@ class ScalarAggregateNode : public ExecNode {
6967
std::vector<int> target_field_ids,
7068
std::vector<internal::Aggregate> aggs,
7169
std::vector<const ScalarAggregateKernel*> kernels,
72-
std::vector<std::vector<std::unique_ptr<KernelState>>> states,
73-
std::vector<std::unique_ptr<FunctionOptions>> owned_options)
70+
std::vector<std::vector<std::unique_ptr<KernelState>>> states)
7471
: ExecNode(plan, std::move(inputs), {"target"},
7572
/*output_schema=*/std::move(output_schema),
7673
/*num_outputs=*/1),
7774
target_field_ids_(std::move(target_field_ids)),
7875
aggs_(std::move(aggs)),
7976
kernels_(std::move(kernels)),
80-
states_(std::move(states)),
81-
owned_options_(std::move(owned_options)) {}
77+
states_(std::move(states)) {}
8278

8379
static Result<ExecNode*> Make(ExecPlan* plan, std::vector<ExecNode*> inputs,
8480
const ExecNodeOptions& options) {
@@ -95,7 +91,6 @@ class ScalarAggregateNode : public ExecNode {
9591
FieldVector fields(kernels.size());
9692
const auto& field_names = aggregate_options.names;
9793
std::vector<int> target_field_ids(kernels.size());
98-
std::vector<std::unique_ptr<FunctionOptions>> owned_options(aggregates.size());
9994

10095
for (size_t i = 0; i < kernels.size(); ++i) {
10196
ARROW_ASSIGN_OR_RAISE(auto match,
@@ -116,11 +111,7 @@ class ScalarAggregateNode : public ExecNode {
116111
kernels[i] = static_cast<const ScalarAggregateKernel*>(kernel);
117112

118113
if (aggregates[i].options == nullptr) {
119-
aggregates[i].options = function->default_options();
120-
}
121-
if (aggregates[i].options) {
122-
owned_options[i] = aggregates[i].options->Copy();
123-
aggregates[i].options = owned_options[i].get();
114+
aggregates[i].options = function->default_options()->Copy();
124115
}
125116

126117
KernelContext kernel_ctx{exec_ctx};
@@ -130,7 +121,7 @@ class ScalarAggregateNode : public ExecNode {
130121
{
131122
in_type,
132123
},
133-
aggregates[i].options},
124+
aggregates[i].options.get()},
134125
&states[i]));
135126

136127
// pick one to resolve the kernel signature
@@ -143,8 +134,7 @@ class ScalarAggregateNode : public ExecNode {
143134

144135
return plan->EmplaceNode<ScalarAggregateNode>(
145136
plan, std::move(inputs), schema(std::move(fields)), std::move(target_field_ids),
146-
std::move(aggregates), std::move(kernels), std::move(states),
147-
std::move(owned_options));
137+
std::move(aggregates), std::move(kernels), std::move(states));
148138
}
149139

150140
const char* kind_name() const override { return "ScalarAggregateNode"; }
@@ -242,7 +232,7 @@ class ScalarAggregateNode : public ExecNode {
242232
std::string ToStringExtra(int indent = 0) const override {
243233
std::stringstream ss;
244234
const auto input_schema = inputs_[0]->output_schema();
245-
AggregatesToString(&ss, *input_schema, aggs_, target_field_ids_, owned_options_);
235+
AggregatesToString(&ss, *input_schema, aggs_, target_field_ids_);
246236
return ss.str();
247237
}
248238

@@ -277,7 +267,6 @@ class ScalarAggregateNode : public ExecNode {
277267
const std::vector<const ScalarAggregateKernel*> kernels_;
278268

279269
std::vector<std::vector<std::unique_ptr<KernelState>>> states_;
280-
const std::vector<std::unique_ptr<FunctionOptions>> owned_options_;
281270

282271
ThreadIndexer get_thread_index_;
283272
AtomicCounter input_counter_;
@@ -288,16 +277,14 @@ class GroupByNode : public ExecNode {
288277
GroupByNode(ExecNode* input, std::shared_ptr<Schema> output_schema, ExecContext* ctx,
289278
std::vector<int> key_field_ids, std::vector<int> agg_src_field_ids,
290279
std::vector<internal::Aggregate> aggs,
291-
std::vector<const HashAggregateKernel*> agg_kernels,
292-
std::vector<std::unique_ptr<FunctionOptions>> owned_options)
280+
std::vector<const HashAggregateKernel*> agg_kernels)
293281
: ExecNode(input->plan(), {input}, {"groupby"}, std::move(output_schema),
294282
/*num_outputs=*/1),
295283
ctx_(ctx),
296284
key_field_ids_(std::move(key_field_ids)),
297285
agg_src_field_ids_(std::move(agg_src_field_ids)),
298286
aggs_(std::move(aggs)),
299-
agg_kernels_(std::move(agg_kernels)),
300-
owned_options_(std::move(owned_options)) {}
287+
agg_kernels_(std::move(agg_kernels)) {}
301288

302289
static Result<ExecNode*> Make(ExecPlan* plan, std::vector<ExecNode*> inputs,
303290
const ExecNodeOptions& options) {
@@ -363,17 +350,9 @@ class GroupByNode : public ExecNode {
363350
output_fields[base + i] = input_schema->field(key_field_id);
364351
}
365352

366-
std::vector<std::unique_ptr<FunctionOptions>> owned_options;
367-
owned_options.reserve(aggs.size());
368-
for (auto& agg : aggs) {
369-
owned_options.push_back(agg.options ? agg.options->Copy() : nullptr);
370-
agg.options = owned_options.back().get();
371-
}
372-
373353
return input->plan()->EmplaceNode<GroupByNode>(
374354
input, schema(std::move(output_fields)), ctx, std::move(key_field_ids),
375-
std::move(agg_src_field_ids), std::move(aggs), std::move(agg_kernels),
376-
std::move(owned_options));
355+
std::move(agg_src_field_ids), std::move(aggs), std::move(agg_kernels));
377356
}
378357

379358
const char* kind_name() const override { return "GroupByNode"; }
@@ -623,8 +602,7 @@ class GroupByNode : public ExecNode {
623602
ss << '"' << input_schema->field(key_field_ids_[i])->name() << '"';
624603
}
625604
ss << "], ";
626-
AggregatesToString(&ss, *input_schema, aggs_, agg_src_field_ids_, owned_options_,
627-
indent);
605+
AggregatesToString(&ss, *input_schema, aggs_, agg_src_field_ids_, indent);
628606
return ss.str();
629607
}
630608

@@ -684,8 +662,6 @@ class GroupByNode : public ExecNode {
684662
const std::vector<int> agg_src_field_ids_;
685663
const std::vector<internal::Aggregate> aggs_;
686664
const std::vector<const HashAggregateKernel*> agg_kernels_;
687-
// ARROW-13638: must hold owned copy of function options
688-
const std::vector<std::unique_ptr<FunctionOptions>> owned_options_;
689665

690666
ThreadIndexer get_thread_index_;
691667
AtomicCounter input_counter_, output_counter_;

cpp/src/arrow/compute/exec/plan_test.cc

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,8 @@ TEST(ExecPlan, ToString) {
375375
)");
376376

377377
ASSERT_OK_AND_ASSIGN(plan, ExecPlan::Make());
378-
CountOptions options(CountOptions::ONLY_VALID);
378+
std::shared_ptr<CountOptions> options =
379+
std::make_shared<CountOptions>(CountOptions::ONLY_VALID);
379380
ASSERT_OK(
380381
Declaration::Sequence(
381382
{
@@ -390,7 +391,7 @@ TEST(ExecPlan, ToString) {
390391
}}},
391392
{"aggregate",
392393
AggregateNodeOptions{
393-
/*aggregates=*/{{"hash_sum", nullptr}, {"hash_count", &options}},
394+
/*aggregates=*/{{"hash_sum", nullptr}, {"hash_count", options}},
394395
/*targets=*/{"multiply(i32, 2)", "multiply(i32, 2)"},
395396
/*names=*/{"sum(multiply(i32, 2))", "count(multiply(i32, 2))"},
396397
/*keys=*/{"bool"}}},
@@ -428,17 +429,17 @@ custom_sink_label:OrderBySinkNode{by={sort_keys=[FieldRef.Name(sum(multiply(i32,
428429
rhs.label = "rhs";
429430
union_node.inputs.emplace_back(lhs);
430431
union_node.inputs.emplace_back(rhs);
431-
ASSERT_OK(
432-
Declaration::Sequence(
433-
{
434-
union_node,
435-
{"aggregate", AggregateNodeOptions{/*aggregates=*/{{"count", &options}},
436-
/*targets=*/{"i32"},
437-
/*names=*/{"count(i32)"},
438-
/*keys=*/{}}},
439-
{"sink", SinkNodeOptions{&sink_gen}},
440-
})
441-
.AddToPlan(plan.get()));
432+
ASSERT_OK(Declaration::Sequence(
433+
{
434+
union_node,
435+
{"aggregate",
436+
AggregateNodeOptions{/*aggregates=*/{{"count", std::move(options)}},
437+
/*targets=*/{"i32"},
438+
/*names=*/{"count(i32)"},
439+
/*keys=*/{}}},
440+
{"sink", SinkNodeOptions{&sink_gen}},
441+
})
442+
.AddToPlan(plan.get()));
442443
EXPECT_EQ(plan->ToString(), R"a(ExecPlan with 5 nodes:
443444
:SinkNode{}
444445
:ScalarAggregateNode{aggregates=[
@@ -1155,7 +1156,7 @@ TEST(ExecPlanExecution, AggregationPreservesOptions) {
11551156
basic_data.gen(/*parallel=*/false,
11561157
/*slow=*/false)}},
11571158
{"aggregate",
1158-
AggregateNodeOptions{/*aggregates=*/{{"tdigest", options.get()}},
1159+
AggregateNodeOptions{/*aggregates=*/{{"tdigest", options}},
11591160
/*targets=*/{"i32"},
11601161
/*names=*/{"tdigest(i32)"}}},
11611162
{"sink", SinkNodeOptions{&sink_gen}},
@@ -1182,7 +1183,7 @@ TEST(ExecPlanExecution, AggregationPreservesOptions) {
11821183
{"source", SourceNodeOptions{data.schema, data.gen(/*parallel=*/false,
11831184
/*slow=*/false)}},
11841185
{"aggregate",
1185-
AggregateNodeOptions{/*aggregates=*/{{"hash_count", options.get()}},
1186+
AggregateNodeOptions{/*aggregates=*/{{"hash_count", options}},
11861187
/*targets=*/{"i32"},
11871188
/*names=*/{"count(i32)"},
11881189
/*keys=*/{"str"}}},

cpp/src/arrow/compute/exec/tpch_benchmark.cc

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,13 @@ std::shared_ptr<ExecPlan> Plan_Q1(AsyncGenerator<util::optional<ExecBatch>>* sin
7474
"sum_charge", "avg_qty", "avg_price", "avg_disc"};
7575
ProjectNodeOptions project_opts(std::move(projection_list), std::move(project_names));
7676

77-
ScalarAggregateOptions sum_opts = ScalarAggregateOptions::Defaults();
78-
CountOptions count_opts(CountOptions::CountMode::ALL);
77+
auto sum_opts =
78+
std::make_shared<ScalarAggregateOptions>(ScalarAggregateOptions::Defaults());
79+
auto count_opts = std::make_shared<CountOptions>(CountOptions::CountMode::ALL);
7980
std::vector<arrow::compute::internal::Aggregate> aggs = {
80-
{"hash_sum", &sum_opts}, {"hash_sum", &sum_opts}, {"hash_sum", &sum_opts},
81-
{"hash_sum", &sum_opts}, {"hash_mean", &sum_opts}, {"hash_mean", &sum_opts},
82-
{"hash_mean", &sum_opts}, {"hash_count", &count_opts}};
81+
{"hash_sum", sum_opts}, {"hash_sum", sum_opts}, {"hash_sum", sum_opts},
82+
{"hash_sum", sum_opts}, {"hash_mean", sum_opts}, {"hash_mean", sum_opts},
83+
{"hash_mean", sum_opts}, {"hash_count", count_opts}};
8384

8485
std::vector<FieldRef> to_aggregate = {"sum_qty", "sum_base_price", "sum_disc_price",
8586
"sum_charge", "avg_qty", "avg_price",

0 commit comments

Comments
 (0)