-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Expand file tree
/
Copy pathfeature_view_projection.py
More file actions
161 lines (143 loc) · 6.75 KB
/
feature_view_projection.py
File metadata and controls
161 lines (143 loc) · 6.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from typing import TYPE_CHECKING, Dict, List, Optional
from attr import dataclass
from feast.data_source import DataSource
from feast.field import Field
from feast.protos.feast.core.FeatureViewProjection_pb2 import (
FeatureViewProjection as FeatureViewProjectionProto,
)
if TYPE_CHECKING:
from feast.base_feature_view import BaseFeatureView
from feast.feature_view import FeatureView
@dataclass
class FeatureViewProjection:
"""
A feature view projection represents a selection of one or more features from a
single feature view.
Attributes:
name: The unique name of the feature view from which this projection is created.
name_alias: An optional alias for the name.
features: The list of features represented by the feature view projection.
desired_features: The list of features that this feature view projection intends to select.
If empty, the projection intends to select all features. This attribute is only used
for feature service inference. It should only be set if the underlying feature view
is not ready to be projected, i.e. still needs to go through feature inference.
join_key_map: A map to modify join key columns during retrieval of this feature
view projection.
timestamp_field: The timestamp field of the feature view projection.
date_partition_column: The date partition column of the feature view projection.
created_timestamp_column: The created timestamp column of the feature view projection.
batch_source: The batch source of data where this group of features
is stored. This is optional ONLY if a push source is specified as the
stream_source, since push sources contain their own batch sources.
"""
name: str
name_alias: Optional[str]
desired_features: List[str]
features: List[Field]
join_key_map: Dict[str, str] = {}
timestamp_field: Optional[str] = None
date_partition_column: Optional[str] = None
created_timestamp_column: Optional[str] = None
batch_source: Optional[DataSource] = None
version_tag: Optional[int] = None
def name_to_use(self):
base = self.name_alias or self.name
if self.version_tag is not None:
return f"{base}@v{self.version_tag}"
return base
def to_proto(self) -> FeatureViewProjectionProto:
batch_source = None
if getattr(self, "batch_source", None):
if isinstance(self.batch_source, DataSource):
batch_source = self.batch_source.to_proto()
else:
batch_source = self.batch_source
feature_reference_proto = FeatureViewProjectionProto(
feature_view_name=self.name,
feature_view_name_alias=self.name_alias or "",
join_key_map=self.join_key_map,
timestamp_field=self.timestamp_field or "",
date_partition_column=self.date_partition_column or "",
created_timestamp_column=self.created_timestamp_column or "",
batch_source=batch_source,
)
for feature in self.features:
feature_reference_proto.feature_columns.append(feature.to_proto())
if self.version_tag is not None:
feature_reference_proto.version_tag = self.version_tag
return feature_reference_proto
@staticmethod
def from_proto(proto: FeatureViewProjectionProto) -> "FeatureViewProjection":
batch_source = (
DataSource.from_proto(proto.batch_source)
if str(getattr(proto, "batch_source"))
else None
)
feature_view_projection = FeatureViewProjection(
name=proto.feature_view_name,
name_alias=proto.feature_view_name_alias or None,
features=[],
join_key_map=dict(proto.join_key_map),
desired_features=[],
timestamp_field=proto.timestamp_field or None,
date_partition_column=proto.date_partition_column or None,
created_timestamp_column=proto.created_timestamp_column or None,
batch_source=batch_source,
)
for feature_column in proto.feature_columns:
feature_view_projection.features.append(Field.from_proto(feature_column))
if proto.HasField("version_tag"):
feature_view_projection.version_tag = proto.version_tag
return feature_view_projection
@staticmethod
def from_feature_view_definition(feature_view: "FeatureView"):
# TODO need to implement this for StreamFeatureViews
batch_source = getattr(feature_view, "batch_source", None)
if batch_source:
return FeatureViewProjection(
name=feature_view.name,
name_alias=None,
features=feature_view.features,
desired_features=[],
timestamp_field=batch_source.created_timestamp_column or None,
created_timestamp_column=batch_source.created_timestamp_column or None,
date_partition_column=batch_source.date_partition_column or None,
batch_source=batch_source or None,
)
else:
return FeatureViewProjection(
name=feature_view.name,
name_alias=None,
features=feature_view.features,
desired_features=[],
)
@staticmethod
def from_definition(base_feature_view: "BaseFeatureView"):
if getattr(base_feature_view, "batch_source", None):
return FeatureViewProjection(
name=base_feature_view.name,
name_alias=None,
features=base_feature_view.features,
desired_features=[],
timestamp_field=base_feature_view.batch_source.created_timestamp_column # type:ignore[attr-defined]
or None,
created_timestamp_column=base_feature_view.batch_source.created_timestamp_column # type:ignore[attr-defined]
or None,
date_partition_column=base_feature_view.batch_source.date_partition_column # type:ignore[attr-defined]
or None,
batch_source=base_feature_view.batch_source or None, # type:ignore[attr-defined]
)
else:
return FeatureViewProjection(
name=base_feature_view.name,
name_alias=None,
features=base_feature_view.features,
desired_features=[],
)
def get_feature(self, feature_name: str) -> Field:
try:
return next(field for field in self.features if field.name == feature_name)
except StopIteration:
raise KeyError(
f"Feature {feature_name} not found in projection {self.name_to_use()}"
)