Skip to content

Commit 7d4e93d

Browse files
vibhathawestonpace
authored andcommitted
ARROW-15515: [C++] Update ExecPlan example code and documentation with new options
This PR includes a documentation update for streaming execution engine and a `table_sink` example. Closes apache#12689 from vibhatha/arrow-15515 Authored-by: Vibhatha Abeykoon <vibhatha@gmail.com> Signed-off-by: Weston Pace <weston.pace@gmail.com>
1 parent fe11431 commit 7d4e93d

3 files changed

Lines changed: 68 additions & 3 deletions

File tree

cpp/examples/arrow/execution_plan_documentation_examples.cc

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,46 @@ arrow::Status SourceUnionSinkExample(cp::ExecContext& exec_context) {
855855

856856
// (Doc section: Union Example)
857857

858+
// (Doc section: Table Sink Example)
859+
860+
/// \brief An example showing a table sink node
861+
/// \param exec_context The execution context to run the plan in
862+
///
863+
/// TableSink Example
864+
/// This example shows how a table_sink can be used
865+
/// in an execution plan. This includes a source node
866+
/// receiving data as batches and the table sink node
867+
/// which emits the output as a table.
868+
arrow::Status TableSinkExample(cp::ExecContext& exec_context) {
869+
ARROW_ASSIGN_OR_RAISE(std::shared_ptr<cp::ExecPlan> plan,
870+
cp::ExecPlan::Make(&exec_context));
871+
872+
ARROW_ASSIGN_OR_RAISE(auto basic_data, MakeBasicBatches());
873+
874+
auto source_node_options = cp::SourceNodeOptions{basic_data.schema, basic_data.gen()};
875+
876+
ARROW_ASSIGN_OR_RAISE(cp::ExecNode * source,
877+
cp::MakeExecNode("source", plan.get(), {}, source_node_options));
878+
879+
std::shared_ptr<arrow::Table> output_table;
880+
auto table_sink_options = cp::TableSinkNodeOptions{&output_table};
881+
882+
ARROW_RETURN_NOT_OK(
883+
cp::MakeExecNode("table_sink", plan.get(), {source}, table_sink_options));
884+
// validate the ExecPlan
885+
ARROW_RETURN_NOT_OK(plan->Validate());
886+
std::cout << "ExecPlan created : " << plan->ToString() << std::endl;
887+
// start the ExecPlan
888+
ARROW_RETURN_NOT_OK(plan->StartProducing());
889+
890+
// Wait for the plan to finish
891+
auto finished = plan->finished();
892+
RETURN_NOT_OK(finished.status());
893+
std::cout << "Results : " << output_table->ToString() << std::endl;
894+
return arrow::Status::OK();
895+
}
896+
// (Doc section: Table Sink Example)
897+
858898
enum ExampleMode {
859899
SOURCE_SINK = 0,
860900
TABLE_SOURCE_SINK = 1,
@@ -869,6 +909,7 @@ enum ExampleMode {
869909
KSELECT = 10,
870910
WRITE = 11,
871911
UNION = 12,
912+
TABLE_SOURCE_TABLE_SINK = 13
872913
};
873914

874915
int main(int argc, char** argv) {
@@ -937,6 +978,10 @@ int main(int argc, char** argv) {
937978
PrintBlock("Union Example");
938979
status = SourceUnionSinkExample(exec_context);
939980
break;
981+
case TABLE_SOURCE_TABLE_SINK:
982+
PrintBlock("TableSink Example");
983+
status = TableSinkExample(exec_context);
984+
break;
940985
default:
941986
break;
942987
}

cpp/src/arrow/compute/exec/options.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,12 +305,11 @@ class ARROW_EXPORT SelectKSinkNodeOptions : public SinkNodeOptions {
305305
/// SelectK options
306306
SelectKOptions select_k_options;
307307
};
308-
309308
/// @}
310309

311-
/// \brief Adapt an Table as a sink node
310+
/// \brief Adapt a Table as a sink node
312311
///
313-
/// obtains the output of a execution plan to
312+
/// obtains the output of an execution plan to
314313
/// a table pointer.
315314
class ARROW_EXPORT TableSinkNodeOptions : public ExecNodeOptions {
316315
public:

docs/source/cpp/streaming_execution.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,8 @@ This is the list of operations associated with the execution plan:
346346
- :class:`arrow::dataset::WriteNodeOptions`
347347
* - ``union``
348348
- N/A
349+
* - ``table_sink``
350+
- :class:`arrow::compute::TableSinkNodeOptions`
349351

350352
.. _stream_execution_source_docs:
351353

@@ -647,6 +649,25 @@ SelectK example:
647649

648650
.. _stream_execution_scan_docs:
649651

652+
``table_sink``
653+
----------------
654+
655+
.. _stream_execution_table_sink_docs:
656+
657+
The ``table_sink`` node provides the ability to receive the output as an in-memory table.
658+
This is simpler to use than the other sink nodes provided by the streaming execution engine
659+
but it only makes sense when the output fits comfortably in memory.
660+
The node is created using :class:`arrow::compute::TableSinkNodeOptions`.
661+
662+
Example of using ``table_sink``
663+
664+
.. literalinclude:: ../../../cpp/examples/arrow/execution_plan_documentation_examples.cc
665+
:language: cpp
666+
:start-after: (Doc section: Table Sink Example)
667+
:end-before: (Doc section: Table Sink Example)
668+
:linenos:
669+
:lineno-match:
670+
650671
``scan``
651672
---------
652673

0 commit comments

Comments
 (0)