Skip to content

Commit 4d37481

Browse files
Adds support for arrays in snowflake
1 parent 774ed33 commit 4d37481

File tree

5 files changed

+256
-3
lines changed

5 files changed

+256
-3
lines changed

sdk/python/feast/infra/offline_stores/snowflake.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import contextlib
2+
import json
23
import os
34
import uuid
45
import warnings
@@ -51,6 +52,7 @@
5152
)
5253
from feast.repo_config import FeastConfigBaseModel, RepoConfig
5354
from feast.saved_dataset import SavedDatasetStorage
55+
from feast.types import Array, Float32, Float64, Int32, Int64, String, UnixTimestamp
5456
from feast.usage import log_exceptions_and_usage
5557

5658
try:
@@ -320,6 +322,7 @@ def query_generator() -> Iterator[str]:
320322
on_demand_feature_views=OnDemandFeatureView.get_requested_odfvs(
321323
feature_refs, project, registry
322324
),
325+
feature_views=feature_views,
323326
metadata=RetrievalMetadata(
324327
features=feature_refs,
325328
keys=list(entity_schema.keys() - {entity_df_event_timestamp_col}),
@@ -398,9 +401,12 @@ def __init__(
398401
config: RepoConfig,
399402
full_feature_names: bool,
400403
on_demand_feature_views: Optional[List[OnDemandFeatureView]] = None,
404+
feature_views: Optional[List[FeatureView]] = None,
401405
metadata: Optional[RetrievalMetadata] = None,
402406
):
403407

408+
if feature_views is None:
409+
feature_views = []
404410
if not isinstance(query, str):
405411
self._query_generator = query
406412
else:
@@ -416,6 +422,7 @@ def query_generator() -> Iterator[str]:
416422
self.config = config
417423
self._full_feature_names = full_feature_names
418424
self._on_demand_feature_views = on_demand_feature_views or []
425+
self._feature_views = feature_views
419426
self._metadata = metadata
420427
self.export_path: Optional[str]
421428
if self.config.offline_store.blob_export_location:
@@ -436,6 +443,18 @@ def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame:
436443
self.snowflake_conn, self.to_sql()
437444
).fetch_pandas_all()
438445

446+
for feature_view in self._feature_views:
447+
for feature in feature_view.features:
448+
if feature.dtype in [
449+
Array(String),
450+
Array(Int32),
451+
Array(Int64),
452+
Array(UnixTimestamp),
453+
Array(Float64),
454+
Array(Float32),
455+
]:
456+
df[feature.name] = df[feature.name].apply(lambda x: json.loads(x))
457+
439458
return df
440459

441460
def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table:

sdk/python/feast/infra/offline_stores/snowflake_source.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -279,12 +279,12 @@ def get_table_column_names_and_types(
279279
else:
280280
row["snowflake_type"] = "NUMBERwSCALE"
281281

282-
elif row["type_code"] in [5, 9, 10, 12]:
282+
elif row["type_code"] in [5, 9, 12]:
283283
error = snowflake_unsupported_map[row["type_code"]]
284284
raise NotImplementedError(
285285
f"The following Snowflake Data Type is not supported: {error}"
286286
)
287-
elif row["type_code"] in [1, 2, 3, 4, 6, 7, 8, 11, 13]:
287+
elif row["type_code"] in [1, 2, 3, 4, 6, 7, 8, 10, 11, 13]:
288288
row["snowflake_type"] = snowflake_type_code_map[row["type_code"]]
289289
else:
290290
raise NotImplementedError(
@@ -305,14 +305,14 @@ def get_table_column_names_and_types(
305305
6: "TIMESTAMP_LTZ",
306306
7: "TIMESTAMP_TZ",
307307
8: "TIMESTAMP_NTZ",
308+
10: "ARRAY",
308309
11: "BINARY",
309310
13: "BOOLEAN",
310311
}
311312

312313
snowflake_unsupported_map = {
313314
5: "VARIANT -- Try converting to VARCHAR",
314315
9: "OBJECT -- Try converting to VARCHAR",
315-
10: "ARRAY -- Try converting to VARCHAR",
316316
12: "TIME -- Try converting to VARCHAR",
317317
}
318318

sdk/python/feast/infra/utils/snowflake/snowpark/snowflake_python_udfs_creation.sql

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,62 @@ CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_varchar_to_string_pro
1414
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_varchar_to_string_proto'
1515
IMPORTS = ('@STAGE_HOLDER/feast.zip');
1616

17+
CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_array_bytes_to_list_bytes_proto(df ARRAY)
18+
RETURNS BINARY
19+
LANGUAGE PYTHON
20+
RUNTIME_VERSION = '3.8'
21+
PACKAGES = ('protobuf', 'pandas')
22+
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_bytes_to_list_bytes_proto'
23+
IMPORTS = ('@STAGE_HOLDER/feast.zip');
24+
25+
CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_array_varchar_to_list_string_proto(df ARRAY)
26+
RETURNS BINARY
27+
LANGUAGE PYTHON
28+
RUNTIME_VERSION = '3.8'
29+
PACKAGES = ('protobuf', 'pandas')
30+
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_varchar_to_list_string_proto'
31+
IMPORTS = ('@STAGE_HOLDER/feast.zip');
32+
33+
CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_array_number_to_list_int32_proto(df ARRAY)
34+
RETURNS BINARY
35+
LANGUAGE PYTHON
36+
RUNTIME_VERSION = '3.8'
37+
PACKAGES = ('protobuf', 'pandas')
38+
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_number_to_list_int32_proto'
39+
IMPORTS = ('@STAGE_HOLDER/feast.zip');
40+
41+
CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_array_number_to_list_int64_proto(df ARRAY)
42+
RETURNS BINARY
43+
LANGUAGE PYTHON
44+
RUNTIME_VERSION = '3.8'
45+
PACKAGES = ('protobuf', 'pandas')
46+
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_number_to_list_int64_proto'
47+
IMPORTS = ('@STAGE_HOLDER/feast.zip');
48+
49+
CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_array_float_to_list_double_proto(df ARRAY)
50+
RETURNS BINARY
51+
LANGUAGE PYTHON
52+
RUNTIME_VERSION = '3.8'
53+
PACKAGES = ('protobuf', 'pandas')
54+
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_float_to_list_double_proto'
55+
IMPORTS = ('@STAGE_HOLDER/feast.zip');
56+
57+
CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_array_boolean_to_list_bool_proto(df ARRAY)
58+
RETURNS BINARY
59+
LANGUAGE PYTHON
60+
RUNTIME_VERSION = '3.8'
61+
PACKAGES = ('protobuf', 'pandas')
62+
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_boolean_to_list_bool_proto'
63+
IMPORTS = ('@STAGE_HOLDER/feast.zip');
64+
65+
CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_array_timestamp_to_list_unix_timestamp_proto(df ARRAY)
66+
RETURNS BINARY
67+
LANGUAGE PYTHON
68+
RUNTIME_VERSION = '3.8'
69+
PACKAGES = ('protobuf', 'pandas')
70+
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_timestamp_to_list_unix_timestamp_proto'
71+
IMPORTS = ('@STAGE_HOLDER/feast.zip');
72+
1773
CREATE FUNCTION IF NOT EXISTS feast_PROJECT_NAME_snowflake_number_to_int32_proto(df NUMBER)
1874
RETURNS BINARY
1975
LANGUAGE PYTHON

sdk/python/feast/infra/utils/snowflake/snowpark/snowflake_udfs.py

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,175 @@ def feast_snowflake_varchar_to_string_proto(df):
5959
return df
6060

6161

62+
"""
63+
CREATE OR REPLACE FUNCTION feast_snowflake_array_bytes_to_list_bytes_proto(df ARRAY)
64+
RETURNS BINARY
65+
LANGUAGE PYTHON
66+
RUNTIME_VERSION = '3.8'
67+
PACKAGES = ('protobuf', 'pandas')
68+
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_bytes_to_list_bytes_proto'
69+
IMPORTS = ('@feast_stage/feast.zip');
70+
"""
71+
# ValueType.STRING_LIST = 12
72+
@vectorized(input=pandas.DataFrame)
73+
def feast_snowflake_array_bytes_to_list_bytes_proto(df):
74+
sys._xoptions["snowflake_partner_attribution"].append("feast")
75+
76+
df = list(
77+
map(
78+
ValueProto.SerializeToString,
79+
python_values_to_proto_values(df[0].to_numpy(), ValueType.BYTES_LIST),
80+
)
81+
)
82+
return df
83+
84+
85+
"""
86+
CREATE OR REPLACE FUNCTION feast_snowflake_array_varchar_to_list_string_proto(df ARRAY)
87+
RETURNS BINARY
88+
LANGUAGE PYTHON
89+
RUNTIME_VERSION = '3.8'
90+
PACKAGES = ('protobuf', 'pandas')
91+
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_varchar_to_list_string_proto'
92+
IMPORTS = ('@feast_stage/feast.zip');
93+
"""
94+
95+
96+
@vectorized(input=pandas.DataFrame)
97+
def feast_snowflake_array_varchar_to_list_string_proto(df):
98+
sys._xoptions["snowflake_partner_attribution"].append("feast")
99+
100+
df = list(
101+
map(
102+
ValueProto.SerializeToString,
103+
python_values_to_proto_values(df[0].to_numpy(), ValueType.STRING_LIST),
104+
)
105+
)
106+
return df
107+
108+
109+
"""
110+
CREATE OR REPLACE FUNCTION feast_snowflake_array_number_to_list_int32_proto(df ARRAY)
111+
RETURNS BINARY
112+
LANGUAGE PYTHON
113+
RUNTIME_VERSION = '3.8'
114+
PACKAGES = ('protobuf', 'pandas')
115+
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_number_to_list_int32_proto'
116+
IMPORTS = ('@feast_stage/feast.zip');
117+
"""
118+
119+
120+
@vectorized(input=pandas.DataFrame)
121+
def feast_snowflake_array_number_to_list_int32_proto(df):
122+
sys._xoptions["snowflake_partner_attribution"].append("feast")
123+
124+
df = list(
125+
map(
126+
ValueProto.SerializeToString,
127+
python_values_to_proto_values(df[0].to_numpy(), ValueType.INT32_LIST),
128+
)
129+
)
130+
return df
131+
132+
133+
"""
134+
CREATE OR REPLACE FUNCTION feast_snowflake_array_number_to_list_int64_proto(df ARRAY)
135+
RETURNS BINARY
136+
LANGUAGE PYTHON
137+
RUNTIME_VERSION = '3.8'
138+
PACKAGES = ('protobuf', 'pandas')
139+
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_number_to_list_int64_proto'
140+
IMPORTS = ('@feast_stage/feast.zip');
141+
"""
142+
143+
144+
@vectorized(input=pandas.DataFrame)
145+
def feast_snowflake_array_number_to_list_int64_proto(df):
146+
sys._xoptions["snowflake_partner_attribution"].append("feast")
147+
148+
df = list(
149+
map(
150+
ValueProto.SerializeToString,
151+
python_values_to_proto_values(df[0].to_numpy(), ValueType.INT64_LIST),
152+
)
153+
)
154+
return df
155+
156+
157+
"""
158+
CREATE OR REPLACE FUNCTION feast_snowflake_array_float_to_list_double_proto(df ARRAY)
159+
RETURNS BINARY
160+
LANGUAGE PYTHON
161+
RUNTIME_VERSION = '3.8'
162+
PACKAGES = ('protobuf', 'pandas')
163+
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_float_to_list_double_proto'
164+
IMPORTS = ('@feast_stage/feast.zip');
165+
"""
166+
167+
168+
@vectorized(input=pandas.DataFrame)
169+
def feast_snowflake_array_float_to_list_double_proto(df):
170+
sys._xoptions["snowflake_partner_attribution"].append("feast")
171+
172+
df = list(
173+
map(
174+
ValueProto.SerializeToString,
175+
python_values_to_proto_values(df[0].to_numpy(), ValueType.DOUBLE_LIST),
176+
)
177+
)
178+
return df
179+
180+
181+
"""
182+
CREATE OR REPLACE FUNCTION feast_snowflake_array_boolean_to_list_bool_proto(df ARRAY)
183+
RETURNS BINARY
184+
LANGUAGE PYTHON
185+
RUNTIME_VERSION = '3.8'
186+
PACKAGES = ('protobuf', 'pandas')
187+
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_boolean_to_list_bool_proto'
188+
IMPORTS = ('@feast_stage/feast.zip');
189+
"""
190+
191+
192+
@vectorized(input=pandas.DataFrame)
193+
def feast_snowflake_array_boolean_to_list_bool_proto(df):
194+
sys._xoptions["snowflake_partner_attribution"].append("feast")
195+
196+
df = list(
197+
map(
198+
ValueProto.SerializeToString,
199+
python_values_to_proto_values(df[0].to_numpy(), ValueType.BOOL_LIST),
200+
)
201+
)
202+
return df
203+
204+
205+
"""
206+
CREATE OR REPLACE FUNCTION feast_snowflake_array_timestamp_to_list_unix_timestamp_proto(df ARRAY)
207+
RETURNS BINARY
208+
LANGUAGE PYTHON
209+
RUNTIME_VERSION = '3.8'
210+
PACKAGES = ('protobuf', 'pandas')
211+
HANDLER = 'feast.infra.utils.snowflake.snowpark.snowflake_udfs.feast_snowflake_array_timestamp_to_list_unix_timestamp_proto'
212+
IMPORTS = ('@feast_stage/feast.zip');
213+
"""
214+
215+
216+
@vectorized(input=pandas.DataFrame)
217+
def feast_snowflake_array_timestamp_to_list_unix_timestamp_proto(df):
218+
sys._xoptions["snowflake_partner_attribution"].append("feast")
219+
220+
df = list(
221+
map(
222+
ValueProto.SerializeToString,
223+
python_values_to_proto_values(
224+
df[0].to_numpy(), ValueType.UNIX_TIMESTAMP_LIST
225+
),
226+
)
227+
)
228+
return df
229+
230+
62231
"""
63232
CREATE OR REPLACE FUNCTION feast_snowflake_number_to_int32_proto(df NUMBER)
64233
RETURNS BINARY

sdk/python/feast/type_map.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,7 @@ def snowflake_type_to_feast_value_type(snowflake_type: str) -> ValueType:
648648
"TIMESTAMP_TZ": ValueType.UNIX_TIMESTAMP,
649649
"TIMESTAMP_LTZ": ValueType.UNIX_TIMESTAMP,
650650
"TIMESTAMP_NTZ": ValueType.UNIX_TIMESTAMP,
651+
"ARRAY": ValueType.STRING, # Actual list type cannot be inferred, so failing to a String instead
651652
}
652653
return type_map[snowflake_type]
653654

@@ -662,6 +663,14 @@ def _convert_value_name_to_snowflake_udf(value_name: str, project_name: str) ->
662663
"FLOAT": f"feast_{project_name}_snowflake_float_to_double_proto",
663664
"BOOL": f"feast_{project_name}_snowflake_boolean_to_bool_proto",
664665
"UNIX_TIMESTAMP": f"feast_{project_name}_snowflake_timestamp_to_unix_timestamp_proto",
666+
"BYTES_LIST": f"feast_{project_name}_snowflake_array_bytes_to_list_bytes_proto",
667+
"STRING_LIST": f"feast_{project_name}_snowflake_array_varchar_to_list_string_proto",
668+
"INT32_LIST": f"feast_{project_name}_snowflake_array_number_to_list_int32_proto",
669+
"INT64_LIST": f"feast_{project_name}_snowflake_array_number_to_list_int64_proto",
670+
"DOUBLE_LIST": f"feast_{project_name}_snowflake_array_float_to_list_double_proto",
671+
"FLOAT_LIST": f"feast_{project_name}_snowflake_array_float_to_list_double_proto",
672+
"BOOL_LIST": f"feast_{project_name}_snowflake_array_boolean_to_list_bool_proto",
673+
"UNIX_TIMESTAMP_LIST": f"feast_{project_name}_snowflake_array_timestamp_to_list_unix_timestamp_proto",
665674
}
666675
return name_map[value_name].upper()
667676

0 commit comments

Comments
 (0)