-
Notifications
You must be signed in to change notification settings - Fork 1.2k
feat: Add materialization to Transformation On Writes #5436
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d26fd67
7ea3704
a892eb4
8c43efa
c096190
e43d491
f52eb53
33acbd1
b6314cd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -656,7 +656,7 @@ def _make_inferences( | |
| def _get_feature_views_to_materialize( | ||
| self, | ||
| feature_views: Optional[List[str]], | ||
| ) -> List[FeatureView]: | ||
| ) -> List[Union[FeatureView, OnDemandFeatureView]]: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Interesting that we are returning only FeatureView/OnDemandFeatureView, but in reality when The return list needs to change here. |
||
| """ | ||
| Returns the list of feature views that should be materialized. | ||
|
|
||
|
|
@@ -669,34 +669,53 @@ def _get_feature_views_to_materialize( | |
| FeatureViewNotFoundException: One of the specified feature views could not be found. | ||
| ValueError: One of the specified feature views is not configured for materialization. | ||
| """ | ||
| feature_views_to_materialize: List[FeatureView] = [] | ||
| feature_views_to_materialize: List[Union[FeatureView, OnDemandFeatureView]] = [] | ||
|
|
||
| if feature_views is None: | ||
| feature_views_to_materialize = utils._list_feature_views( | ||
| regular_feature_views = utils._list_feature_views( | ||
| self._registry, self.project, hide_dummy_entity=False | ||
| ) | ||
| feature_views_to_materialize = [ | ||
| fv for fv in feature_views_to_materialize if fv.online | ||
| ] | ||
| feature_views_to_materialize.extend( | ||
| [fv for fv in regular_feature_views if fv.online] | ||
| ) | ||
| stream_feature_views_to_materialize = self._list_stream_feature_views( | ||
| hide_dummy_entity=False | ||
| ) | ||
| feature_views_to_materialize += [ | ||
| sfv for sfv in stream_feature_views_to_materialize if sfv.online | ||
| ] | ||
| feature_views_to_materialize.extend( | ||
| [sfv for sfv in stream_feature_views_to_materialize if sfv.online] | ||
| ) | ||
| on_demand_feature_views_to_materialize = self.list_on_demand_feature_views() | ||
| feature_views_to_materialize.extend( | ||
| [ | ||
| odfv | ||
| for odfv in on_demand_feature_views_to_materialize | ||
| if odfv.write_to_online_store | ||
| ] | ||
| ) | ||
| else: | ||
| for name in feature_views: | ||
| feature_view: Union[FeatureView, OnDemandFeatureView] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we accept one more parameter to Its really confusing why we are returning different FV types list in |
||
| try: | ||
| feature_view = self._get_feature_view(name, hide_dummy_entity=False) | ||
| except FeatureViewNotFoundException: | ||
| feature_view = self._get_stream_feature_view( | ||
| name, hide_dummy_entity=False | ||
| ) | ||
| try: | ||
| feature_view = self._get_stream_feature_view( | ||
| name, hide_dummy_entity=False | ||
| ) | ||
| except FeatureViewNotFoundException: | ||
| feature_view = self.get_on_demand_feature_view(name) | ||
|
|
||
| if not feature_view.online: | ||
| if hasattr(feature_view, "online") and not feature_view.online: | ||
| raise ValueError( | ||
| f"FeatureView {feature_view.name} is not configured to be served online." | ||
| ) | ||
| elif ( | ||
| hasattr(feature_view, "write_to_online_store") | ||
| and not feature_view.write_to_online_store | ||
| ): | ||
| raise ValueError( | ||
| f"OnDemandFeatureView {feature_view.name} is not configured for write_to_online_store." | ||
| ) | ||
| feature_views_to_materialize.append(feature_view) | ||
|
|
||
| return feature_views_to_materialize | ||
|
|
@@ -866,7 +885,8 @@ def apply( | |
| views_to_update = [ | ||
| ob | ||
| for ob in objects | ||
| if ( | ||
| if | ||
| ( | ||
| # BFVs are not handled separately from FVs right now. | ||
| (isinstance(ob, FeatureView) or isinstance(ob, BatchFeatureView)) | ||
| and not isinstance(ob, StreamFeatureView) | ||
|
|
@@ -1312,6 +1332,11 @@ def materialize_incremental( | |
| ) | ||
| # TODO paging large loads | ||
| for feature_view in feature_views_to_materialize: | ||
| from feast.on_demand_feature_view import OnDemandFeatureView | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. to avoid import twice, can put this at top of the file? |
||
|
|
||
| if isinstance(feature_view, OnDemandFeatureView): | ||
| continue | ||
|
Comment on lines
+1337
to
+1338
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For my knowledge: |
||
|
|
||
| start_date = feature_view.most_recent_end_time | ||
| if start_date is None: | ||
| if feature_view.ttl is None: | ||
|
|
@@ -1352,12 +1377,13 @@ def tqdm_builder(length): | |
| tqdm_builder=tqdm_builder, | ||
| ) | ||
|
|
||
| self._registry.apply_materialization( | ||
| feature_view, | ||
| self.project, | ||
| start_date, | ||
| end_date, | ||
| ) | ||
| if not isinstance(feature_view, OnDemandFeatureView): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since We are continuing the loop at line 1337&1338, we dont need this condition here. |
||
| self._registry.apply_materialization( | ||
| feature_view, | ||
| self.project, | ||
| start_date, | ||
| end_date, | ||
| ) | ||
|
|
||
| def materialize( | ||
| self, | ||
|
|
@@ -1407,6 +1433,8 @@ def materialize( | |
| ) | ||
| # TODO paging large loads | ||
| for feature_view in feature_views_to_materialize: | ||
| from feast.on_demand_feature_view import OnDemandFeatureView | ||
|
|
||
| provider = self._get_provider() | ||
| print(f"{Style.BRIGHT + Fore.GREEN}{feature_view.name}{Style.RESET_ALL}:") | ||
|
|
||
|
|
@@ -1426,12 +1454,13 @@ def tqdm_builder(length): | |
| tqdm_builder=tqdm_builder, | ||
| ) | ||
|
|
||
| self._registry.apply_materialization( | ||
| feature_view, | ||
| self.project, | ||
| start_date, | ||
| end_date, | ||
| ) | ||
| if not isinstance(feature_view, OnDemandFeatureView): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than waiting until this line for ODFV based skip, kind of bringing uniqueness ! |
||
| self._registry.apply_materialization( | ||
| feature_view, | ||
| self.project, | ||
| start_date, | ||
| end_date, | ||
| ) | ||
|
|
||
| def _fvs_for_push_source_or_raise( | ||
| self, push_source_name: str, allow_cache: bool | ||
|
|
@@ -2033,9 +2062,9 @@ def retrieve_online_documents_v2( | |
| distance_metric: The distance metric to use for retrieval. | ||
| query_string: The query string to retrieve the closest document features using keyword search (bm25). | ||
| """ | ||
| assert query is not None or query_string is not None, ( | ||
| "Either query or query_string must be provided." | ||
| ) | ||
| assert ( | ||
| query is not None or query_string is not None | ||
| ), "Either query or query_string must be provided." | ||
|
|
||
| ( | ||
| available_feature_views, | ||
|
|
@@ -2348,9 +2377,9 @@ def write_logged_features( | |
| if not isinstance(source, FeatureService): | ||
| raise ValueError("Only feature service is currently supported as a source") | ||
|
|
||
| assert source.logging_config is not None, ( | ||
| "Feature service must be configured with logging config in order to use this functionality" | ||
| ) | ||
| assert ( | ||
| source.logging_config is not None | ||
| ), "Feature service must be configured with logging config in order to use this functionality" | ||
|
|
||
| assert isinstance(logs, (pa.Table, Path)) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,7 +16,7 @@ | |
| import pyarrow as pa | ||
| from tqdm import tqdm | ||
|
|
||
| from feast import OnDemandFeatureView, importer | ||
| from feast import importer | ||
| from feast.base_feature_view import BaseFeatureView | ||
| from feast.batch_feature_view import BatchFeatureView | ||
| from feast.data_source import DataSource | ||
|
|
@@ -38,6 +38,7 @@ | |
| from feast.infra.provider import Provider | ||
| from feast.infra.registry.base_registry import BaseRegistry | ||
| from feast.infra.supported_async_methods import ProviderAsyncMethods | ||
| from feast.on_demand_feature_view import OnDemandFeatureView | ||
| from feast.online_response import OnlineResponse | ||
| from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto | ||
| from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto | ||
|
|
@@ -420,13 +421,22 @@ def ingest_df_to_offline_store(self, feature_view: FeatureView, table: pa.Table) | |
| def materialize_single_feature_view( | ||
| self, | ||
| config: RepoConfig, | ||
| feature_view: FeatureView, | ||
| feature_view: Union[FeatureView, OnDemandFeatureView], | ||
| start_date: datetime, | ||
| end_date: datetime, | ||
| registry: BaseRegistry, | ||
| project: str, | ||
| tqdm_builder: Callable[[int], tqdm], | ||
| ) -> None: | ||
| from feast.on_demand_feature_view import OnDemandFeatureView | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are reapeating the import. |
||
|
|
||
| if isinstance(feature_view, OnDemandFeatureView): | ||
| if not feature_view.write_to_online_store: | ||
| raise ValueError( | ||
| f"OnDemandFeatureView {feature_view.name} does not have write_to_online_store enabled" | ||
| ) | ||
| return | ||
|
|
||
| assert ( | ||
| isinstance(feature_view, BatchFeatureView) | ||
| or isinstance(feature_view, StreamFeatureView) | ||
|
|
@@ -496,9 +506,9 @@ def write_feature_service_logs( | |
| config: RepoConfig, | ||
| registry: BaseRegistry, | ||
| ): | ||
| assert feature_service.logging_config is not None, ( | ||
| "Logging should be configured for the feature service before calling this function" | ||
| ) | ||
| assert ( | ||
| feature_service.logging_config is not None | ||
| ), "Logging should be configured for the feature service before calling this function" | ||
|
|
||
| self.offline_store.write_logged_features( | ||
| config=config, | ||
|
|
@@ -516,9 +526,9 @@ def retrieve_feature_service_logs( | |
| config: RepoConfig, | ||
| registry: BaseRegistry, | ||
| ) -> RetrievalJob: | ||
| assert feature_service.logging_config is not None, ( | ||
| "Logging should be configured for the feature service before calling this function" | ||
| ) | ||
| assert ( | ||
| feature_service.logging_config is not None | ||
| ), "Logging should be configured for the feature service before calling this function" | ||
|
|
||
| logging_source = FeatureServiceLoggingSource(feature_service, config.project) | ||
| schema = logging_source.get_schema(registry) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.