-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Expand file tree
/
Copy pathDataSource.proto
More file actions
282 lines (221 loc) · 8.73 KB
/
DataSource.proto
File metadata and controls
282 lines (221 loc) · 8.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
//
// Copyright 2020 The Feast Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
syntax = "proto3";
package feast.core;
option go_package = "github.com/feast-dev/feast/go/protos/feast/core";
option java_outer_classname = "DataSourceProto";
option java_package = "feast.proto.core";
import "google/protobuf/duration.proto";
import "google/protobuf/timestamp.proto";
import "feast/core/DataFormat.proto";
import "feast/types/Value.proto";
import "feast/core/Feature.proto";
// Defines a Data Source that can be used source Feature data
// Next available id: 28
message DataSource {
// Field indexes should *not* be reused. Not sure if fields 6-10 were used previously or not,
// but they are going to be reserved for backwards compatibility.
reserved 6 to 10;
// Type of Data Source.
// Next available id: 13
enum SourceType {
INVALID = 0;
BATCH_FILE = 1;
BATCH_SNOWFLAKE = 8;
BATCH_BIGQUERY = 2;
BATCH_REDSHIFT = 5;
STREAM_KAFKA = 3;
STREAM_KINESIS = 4;
CUSTOM_SOURCE = 6;
REQUEST_SOURCE = 7;
PUSH_SOURCE = 9;
BATCH_TRINO = 10;
BATCH_SPARK = 11;
BATCH_ATHENA = 12;
}
// Unique name of data source within the project
string name = 20;
// Name of Feast project that this data source belongs to.
string project = 21;
string description = 23;
map<string, string> tags = 24;
string owner = 25;
SourceType type = 1;
// Defines mapping between fields in the sourced data
// and fields in parent FeatureTable.
map<string, string> field_mapping = 2;
// Must specify event timestamp column name
string timestamp_field = 3;
// (Optional) Specify partition column
// useful for file sources
string date_partition_column = 4;
// Must specify creation timestamp column name
string created_timestamp_column = 5;
// This is an internal field that is represents the python class for the data source object a proto object represents.
// This should be set by feast, and not by users.
// The field is used primarily by custom data sources and is mandatory for them to set. Feast may set it for
// first party sources as well.
string data_source_class_type = 17;
// Optional batch source for streaming sources for historical features and materialization.
DataSource batch_source = 26;
SourceMeta meta = 50;
message SourceMeta {
google.protobuf.Timestamp earliestEventTimestamp = 1;
google.protobuf.Timestamp latestEventTimestamp = 2;
google.protobuf.Timestamp created_timestamp = 3;
google.protobuf.Timestamp last_updated_timestamp = 4;
}
// Defines options for DataSource that sources features from a file
message FileOptions {
FileFormat file_format = 1;
// Target URL of file to retrieve and source features from.
// s3://path/to/file for AWS S3 storage
// gs://path/to/file for GCP GCS storage
// file:///path/to/file for local storage
string uri = 2;
// override AWS S3 storage endpoint with custom S3 endpoint
string s3_endpoint_override = 3;
}
// Defines options for DataSource that sources features from a BigQuery Query
message BigQueryOptions {
// Full table reference in the form of [project:dataset.table]
string table = 1;
// SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective
// entity columns
string query = 2;
}
// Defines options for DataSource that sources features from a Trino Query
message TrinoOptions {
// Full table reference in the form of [project:dataset.table]
string table = 1;
// SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective
// entity columns
string query = 2;
}
// Defines options for DataSource that sources features from Kafka messages.
// Each message should be a Protobuf that can be decoded with the generated
// Java Protobuf class at the given class path
message KafkaOptions {
// Comma separated list of Kafka bootstrap servers. Used for feature tables without a defined source host[:port]]
string kafka_bootstrap_servers = 1;
// Kafka topic to collect feature data from.
string topic = 2;
// Defines the stream data format encoding feature/entity data in Kafka messages.
StreamFormat message_format = 3;
// Watermark delay threshold for stream data
google.protobuf.Duration watermark_delay_threshold = 4;
}
// Defines options for DataSource that sources features from Kinesis records.
// Each record should be a Protobuf that can be decoded with the generated
// Java Protobuf class at the given class path
message KinesisOptions {
// AWS region of the Kinesis stream
string region = 1;
// Name of the Kinesis stream to obtain feature data from.
string stream_name = 2;
// Defines the data format encoding the feature/entity data in Kinesis records.
// Kinesis Data Sources support Avro and Proto as data formats.
StreamFormat record_format = 3;
}
// Defines options for DataSource that sources features from a Redshift Query
message RedshiftOptions {
// Redshift table name
string table = 1;
// SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective
// entity columns
string query = 2;
// Redshift schema name
string schema = 3;
// Redshift database name
string database = 4;
}
// Defines options for DataSource that sources features from a Athena Query
message AthenaOptions {
// Athena table name
string table = 1;
// SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective
// entity columns
string query = 2;
// Athena database name
string database = 3;
// Athena schema name
string data_source = 4;
}
// Defines options for DataSource that sources features from a Snowflake Query
message SnowflakeOptions {
reserved 5; // Snowflake warehouse name
// Snowflake table name
string table = 1;
// SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective
// entity columns
string query = 2;
// Snowflake schema name
string schema = 3;
// Snowflake schema name
string database = 4;
}
// Defines options for DataSource that sources features from a spark table/query
message SparkOptions {
// Table name
string table = 1;
// Spark SQl query that returns the table, this is an alternative to `table`
string query = 2;
// Path from which spark can read the table, this is an alternative to `table`
string path = 3;
// Format of files at `path` (e.g. parquet, avro, etc)
string file_format = 4;
// Date Format of date partition column (e.g. %Y-%m-%d)
string date_partition_column_format = 5;
// Table Format (e.g. iceberg, delta, hudi)
TableFormat table_format = 6;
}
// Defines configuration for custom third-party data sources.
message CustomSourceOptions {
// Serialized configuration information for the data source. The implementer of the custom data source is
// responsible for serializing and deserializing data from bytes
bytes configuration = 1;
}
// Defines options for DataSource that sources features from request data
message RequestDataOptions {
reserved 1;
// Mapping of feature name to type
map<string, feast.types.ValueType.Enum> deprecated_schema = 2;
repeated FeatureSpecV2 schema = 3;
}
// Defines options for DataSource that supports pushing data to it. This allows data to be pushed to
// the online store on-demand, such as by stream consumers.
message PushOptions {
reserved 1;
}
// DataSource options.
oneof options {
FileOptions file_options = 11;
BigQueryOptions bigquery_options = 12;
KafkaOptions kafka_options = 13;
KinesisOptions kinesis_options = 14;
RedshiftOptions redshift_options = 15;
RequestDataOptions request_data_options = 18;
CustomSourceOptions custom_options = 16;
SnowflakeOptions snowflake_options = 19;
PushOptions push_options = 22;
SparkOptions spark_options = 27;
TrinoOptions trino_options = 30;
AthenaOptions athena_options = 35;
}
}
message DataSourceList {
repeated DataSource datasources = 1;
}