forked from apache/arrow
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserde.h
More file actions
304 lines (278 loc) · 14.7 KB
/
Copy pathserde.h
File metadata and controls
304 lines (278 loc) · 14.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This API is EXPERIMENTAL.
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <string_view>
#include <vector>
#include "arrow/compute/type_fwd.h"
#include "arrow/dataset/type_fwd.h"
#include "arrow/engine/substrait/options.h"
#include "arrow/engine/substrait/type_fwd.h"
#include "arrow/engine/substrait/visibility.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type_fwd.h"
#include "arrow/util/macros.h"
namespace arrow {
namespace engine {
/// \brief Serialize an Acero Plan to a binary protobuf Substrait message
///
/// \param[in] declaration the Acero declaration to serialize.
/// This declaration is the sink relation of the Acero plan.
/// \param[in,out] ext_set the extension mapping to use; may be updated to add
/// \param[in] conversion_options options to control how the conversion is done
///
/// \return a buffer containing the protobuf serialization of the Acero relation
ARROW_ENGINE_EXPORT
Result<std::shared_ptr<Buffer>> SerializePlan(
const compute::Declaration& declaration, ExtensionSet* ext_set,
const ConversionOptions& conversion_options = {});
/// Factory function type for generating the node that consumes the batches produced by
/// each toplevel Substrait relation when deserializing a Substrait Plan.
using ConsumerFactory = std::function<std::shared_ptr<compute::SinkNodeConsumer>()>;
/// \brief Deserializes a Substrait Plan message to a list of ExecNode declarations
///
/// The output of each top-level Substrait relation will be sent to a caller supplied
/// consumer function provided by consumer_factory
///
/// \param[in] buf a buffer containing the protobuf serialization of a Substrait Plan
/// message
/// \param[in] consumer_factory factory function for generating the node that consumes
/// the batches produced by each toplevel Substrait relation
/// \param[in] registry an extension-id-registry to use, or null for the default one.
/// \param[out] ext_set_out if non-null, the extension mapping used by the Substrait
/// Plan is returned here.
/// \param[in] conversion_options options to control how the conversion is to be done.
/// \return a vector of ExecNode declarations, one for each toplevel relation in the
/// Substrait Plan
ARROW_ENGINE_EXPORT Result<std::vector<compute::Declaration>> DeserializePlans(
const Buffer& buf, const ConsumerFactory& consumer_factory,
const ExtensionIdRegistry* registry = NULLPTR, ExtensionSet* ext_set_out = NULLPTR,
const ConversionOptions& conversion_options = {});
/// \brief Deserializes a single-relation Substrait Plan message to an execution plan
///
/// The output of each top-level Substrait relation will be sent to a caller supplied
/// consumer function provided by consumer_factory
///
/// \param[in] buf a buffer containing the protobuf serialization of a Substrait Plan
/// message
/// \param[in] consumer node that consumes the batches produced by each toplevel Substrait
/// relation
/// \param[in] registry an extension-id-registry to use, or null for the default one.
/// \param[out] ext_set_out if non-null, the extension mapping used by the Substrait
/// \param[in] conversion_options options to control how the conversion is to be done.
/// Plan is returned here.
/// \return an ExecNode corresponding to the single toplevel relation in the Substrait
/// Plan
ARROW_ENGINE_EXPORT Result<std::shared_ptr<compute::ExecPlan>> DeserializePlan(
const Buffer& buf, const std::shared_ptr<compute::SinkNodeConsumer>& consumer,
const ExtensionIdRegistry* registry = NULLPTR, ExtensionSet* ext_set_out = NULLPTR,
const ConversionOptions& conversion_options = {});
/// Factory function type for generating the write options of a node consuming the batches
/// produced by each toplevel Substrait relation when deserializing a Substrait Plan.
using WriteOptionsFactory = std::function<std::shared_ptr<dataset::WriteNodeOptions>()>;
/// \brief Deserializes a Substrait Plan message to a list of ExecNode declarations
///
/// The output of each top-level Substrait relation will be written to a filesystem.
/// `write_options_factory` can be used to control write behavior.
///
/// \param[in] buf a buffer containing the protobuf serialization of a Substrait Plan
/// message
/// \param[in] write_options_factory factory function for generating the write options of
/// a node consuming the batches produced by each toplevel Substrait relation
/// \param[in] registry an extension-id-registry to use, or null for the default one.
/// \param[out] ext_set_out if non-null, the extension mapping used by the Substrait
/// Plan is returned here.
/// \param[in] conversion_options options to control how the conversion is to be done.
/// \return a vector of ExecNode declarations, one for each toplevel relation in the
/// Substrait Plan
ARROW_ENGINE_EXPORT Result<std::vector<compute::Declaration>> DeserializePlans(
const Buffer& buf, const WriteOptionsFactory& write_options_factory,
const ExtensionIdRegistry* registry = NULLPTR, ExtensionSet* ext_set_out = NULLPTR,
const ConversionOptions& conversion_options = {});
/// \brief Deserializes a single-relation Substrait Plan message to an execution plan
///
/// The output of the single Substrait relation will be written to a filesystem.
/// `write_options_factory` can be used to control write behavior.
///
/// \param[in] buf a buffer containing the protobuf serialization of a Substrait Plan
/// message
/// \param[in] write_options write options of a node consuming the batches produced by
/// each toplevel Substrait relation
/// \param[in] registry an extension-id-registry to use, or null for the default one.
/// \param[out] ext_set_out if non-null, the extension mapping used by the Substrait
/// Plan is returned here.
/// \param[in] conversion_options options to control how the conversion is to be done.
/// \return a vector of ExecNode declarations, one for each toplevel relation in the
/// Substrait Plan
ARROW_ENGINE_EXPORT Result<std::shared_ptr<compute::ExecPlan>> DeserializePlan(
const Buffer& buf, const std::shared_ptr<dataset::WriteNodeOptions>& write_options,
const ExtensionIdRegistry* registry = NULLPTR, ExtensionSet* ext_set_out = NULLPTR,
const ConversionOptions& conversion_options = {});
/// \brief Deserializes a Substrait Plan message to a Declaration
///
/// The plan will not contain any sink nodes and will be suitable for use in any
/// of the arrow::compute::DeclarationToXyz methods.
///
/// \param[in] buf a buffer containing the protobuf serialization of a Substrait Plan
/// message
/// \param[in] registry an extension-id-registry to use, or null for the default one.
/// \param[out] ext_set_out if non-null, the extension mapping used by the Substrait
/// Plan is returned here.
/// \param[in] conversion_options options to control how the conversion is to be done.
/// \return A declaration representing the Substrait plan
ARROW_ENGINE_EXPORT Result<compute::Declaration> DeserializePlan(
const Buffer& buf, const ExtensionIdRegistry* registry = NULLPTR,
ExtensionSet* ext_set_out = NULLPTR,
const ConversionOptions& conversion_options = {});
/// \brief Deserializes a Substrait Type message to the corresponding Arrow type
///
/// \param[in] buf a buffer containing the protobuf serialization of a Substrait Type
/// message
/// \param[in] ext_set the extension mapping to use, normally provided by the
/// surrounding Plan message
/// \param[in] conversion_options options to control how the conversion is to be done.
/// \return the corresponding Arrow data type
ARROW_ENGINE_EXPORT
Result<std::shared_ptr<DataType>> DeserializeType(
const Buffer& buf, const ExtensionSet& ext_set,
const ConversionOptions& conversion_options = {});
/// \brief Serializes an Arrow type to a Substrait Type message
///
/// \param[in] type the Arrow data type to serialize
/// \param[in,out] ext_set the extension mapping to use; may be updated to add a
/// mapping for the given type
/// \param[in] conversion_options options to control how the conversion is to be done.
/// \return a buffer containing the protobuf serialization of the corresponding Substrait
/// Type message
ARROW_ENGINE_EXPORT
Result<std::shared_ptr<Buffer>> SerializeType(
const DataType& type, ExtensionSet* ext_set,
const ConversionOptions& conversion_options = {});
/// \brief Deserializes a Substrait NamedStruct message to an Arrow schema
///
/// \param[in] buf a buffer containing the protobuf serialization of a Substrait
/// NamedStruct message
/// \param[in] ext_set the extension mapping to use, normally provided by the
/// surrounding Plan message
/// \param[in] conversion_options options to control how the conversion is to be done.
/// \return the corresponding Arrow schema
ARROW_ENGINE_EXPORT
Result<std::shared_ptr<Schema>> DeserializeSchema(
const Buffer& buf, const ExtensionSet& ext_set,
const ConversionOptions& conversion_options = {});
/// \brief Serializes an Arrow schema to a Substrait NamedStruct message
///
/// \param[in] schema the Arrow schema to serialize
/// \param[in,out] ext_set the extension mapping to use; may be updated to add
/// mappings for the types used in the schema
/// \param[in] conversion_options options to control how the conversion is to be done.
/// \return a buffer containing the protobuf serialization of the corresponding Substrait
/// NamedStruct message
ARROW_ENGINE_EXPORT
Result<std::shared_ptr<Buffer>> SerializeSchema(
const Schema& schema, ExtensionSet* ext_set,
const ConversionOptions& conversion_options = {});
/// \brief Deserializes a Substrait Expression message to a compute expression
///
/// \param[in] buf a buffer containing the protobuf serialization of a Substrait
/// Expression message
/// \param[in] ext_set the extension mapping to use, normally provided by the
/// surrounding Plan message
/// \param[in] conversion_options options to control how the conversion is to be done.
/// \return the corresponding Arrow compute expression
ARROW_ENGINE_EXPORT
Result<compute::Expression> DeserializeExpression(
const Buffer& buf, const ExtensionSet& ext_set,
const ConversionOptions& conversion_options = {});
/// \brief Serializes an Arrow compute expression to a Substrait Expression message
///
/// \param[in] expr the Arrow compute expression to serialize
/// \param[in,out] ext_set the extension mapping to use; may be updated to add
/// mappings for the types used in the expression
/// \param[in] conversion_options options to control how the conversion is to be done.
/// \return a buffer containing the protobuf serialization of the corresponding Substrait
/// Expression message
ARROW_ENGINE_EXPORT
Result<std::shared_ptr<Buffer>> SerializeExpression(
const compute::Expression& expr, ExtensionSet* ext_set,
const ConversionOptions& conversion_options = {});
/// \brief Serialize an Acero Declaration to a binary protobuf Substrait message
///
/// \param[in] declaration the Acero declaration to serialize
/// \param[in,out] ext_set the extension mapping to use; may be updated to add
/// \param[in] conversion_options options to control how the conversion is done
///
/// \return a buffer containing the protobuf serialization of the Acero relation
ARROW_ENGINE_EXPORT Result<std::shared_ptr<Buffer>> SerializeRelation(
const compute::Declaration& declaration, ExtensionSet* ext_set,
const ConversionOptions& conversion_options = {});
/// \brief Deserializes a Substrait Rel (relation) message to an ExecNode declaration
///
/// \param[in] buf a buffer containing the protobuf serialization of a Substrait
/// Rel message
/// \param[in] ext_set the extension mapping to use, normally provided by the
/// surrounding Plan message
/// \param[in] conversion_options options to control how the conversion is to be done.
/// \return the corresponding ExecNode declaration
ARROW_ENGINE_EXPORT Result<compute::Declaration> DeserializeRelation(
const Buffer& buf, const ExtensionSet& ext_set,
const ConversionOptions& conversion_options = {});
namespace internal {
/// \brief Checks whether two protobuf serializations of a particular Substrait message
/// type are equivalent
///
/// Note that a binary comparison of the two buffers is insufficient. One reason for this
/// is that the fields of a message can be specified in any order in the serialization.
///
/// \param[in] message_name the name of the Substrait message type to check
/// \param[in] l_buf buffer containing the first protobuf serialization to compare
/// \param[in] r_buf buffer containing the second protobuf serialization to compare
/// \return success if equivalent, failure if not
ARROW_ENGINE_EXPORT
Status CheckMessagesEquivalent(std::string_view message_name, const Buffer& l_buf,
const Buffer& r_buf);
/// \brief Utility function to convert a JSON serialization of a Substrait message to
/// its binary serialization
///
/// \param[in] type_name the name of the Substrait message type to convert
/// \param[in] json the JSON string to convert
/// \param[in] ignore_unknown_fields if true then unknown fields will be ignored and
/// will not cause an error
///
/// This should generally be true to allow consumption of plans from newer
/// producers but setting to false can be useful if you are testing
/// conformance to a specific Substrait version
/// \return a buffer filled with the binary protobuf serialization of message
ARROW_ENGINE_EXPORT
Result<std::shared_ptr<Buffer>> SubstraitFromJSON(std::string_view type_name,
std::string_view json,
bool ignore_unknown_fields = true);
/// \brief Utility function to convert a binary protobuf serialization of a Substrait
/// message to JSON
///
/// \param[in] type_name the name of the Substrait message type to convert
/// \param[in] buf the buffer containing the binary protobuf serialization of the message
/// \return a JSON string representing the message
ARROW_ENGINE_EXPORT
Result<std::string> SubstraitToJSON(std::string_view type_name, const Buffer& buf);
} // namespace internal
} // namespace engine
} // namespace arrow