Skip to content

Commit b3dcde7

Browse files
Ambient Code Botntkathole
authored andcommitted
Fix ODFV output projection in offline retrieval (#6099)
Changes: - Modified RetrievalJob.to_arrow() to filter ODFV outputs based on requested features from metadata, matching online retrieval behavior - Added test_odfv_projection to verify the fix and prevent regression Signed-off-by: Jitendra Yejare <11752425+jyejare@users.noreply.github.com>
1 parent a623674 commit b3dcde7

File tree

4 files changed

+158
-5
lines changed

4 files changed

+158
-5
lines changed

sdk/python/feast/feature_store.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1447,6 +1447,19 @@ def get_historical_features(
14471447
feature_views = list(view for view, _ in fvs)
14481448
on_demand_feature_views = list(view for view, _ in odfvs)
14491449

1450+
# ODFV source FV dependencies (e.g. driver_stats:conv_rate) are resolved
1451+
# by _group_feature_refs and included in `fvs`, but not in _feature_refs.
1452+
# Offline stores use feature_refs to map which features to fetch from each
1453+
# FV, so we must include these implicit dependency refs.
1454+
_feature_refs_for_provider = list(_feature_refs)
1455+
existing_refs = set(_feature_refs)
1456+
for view, feats in fvs:
1457+
for feat in feats:
1458+
ref = f"{view.projection.name_to_use()}:{feat}"
1459+
if ref not in existing_refs:
1460+
_feature_refs_for_provider.append(ref)
1461+
existing_refs.add(ref)
1462+
14501463
# Check that the right request data is present in the entity_df
14511464
if type(entity_df) == pd.DataFrame:
14521465
if self.config.coerce_tz_aware:
@@ -1473,7 +1486,7 @@ def get_historical_features(
14731486
job = provider.get_historical_features(
14741487
self.config,
14751488
feature_views,
1476-
_feature_refs,
1489+
_feature_refs_for_provider,
14771490
entity_df,
14781491
self.registry,
14791492
self.project,

sdk/python/feast/infra/offline_stores/offline_store.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,17 +154,59 @@ def to_arrow(
154154
"""
155155
features_table = self._to_arrow_internal(timeout=timeout)
156156
if self.on_demand_feature_views:
157+
# Build a mapping of ODFV name to requested feature names
158+
# This ensures we only return the features that were explicitly requested
159+
odfv_feature_refs: Dict[str, set[str]] = {}
160+
try:
161+
metadata = self.metadata
162+
except NotImplementedError:
163+
metadata = None
164+
165+
if metadata and metadata.features:
166+
for feature_ref in metadata.features:
167+
if ":" in feature_ref:
168+
view_name, feature_name = feature_ref.split(":", 1)
169+
# Check if this view_name matches any of the ODFVs
170+
for odfv in self.on_demand_feature_views:
171+
if (
172+
odfv.name == view_name
173+
or odfv.projection.name_to_use() == view_name
174+
):
175+
if view_name not in odfv_feature_refs:
176+
odfv_feature_refs[view_name] = set()
177+
# Store the feature name in the format that will appear in transformed_arrow
178+
expected_col_name = (
179+
f"{odfv.projection.name_to_use()}__{feature_name}"
180+
if self.full_feature_names
181+
else feature_name
182+
)
183+
odfv_feature_refs[view_name].add(expected_col_name)
184+
157185
for odfv in self.on_demand_feature_views:
158186
transformed_arrow = odfv.transform_arrow(
159187
features_table, self.full_feature_names
160188
)
161189

190+
# Determine which columns to include from this ODFV
191+
# If we have metadata with requested features, filter to only those
192+
# Otherwise, include all columns (backward compatibility)
193+
requested_features_for_odfv = (
194+
odfv_feature_refs.get(odfv.name)
195+
if odfv.name in odfv_feature_refs
196+
else odfv_feature_refs.get(odfv.projection.name_to_use())
197+
)
198+
162199
for col in transformed_arrow.column_names:
163200
if col.startswith("__index"):
164201
continue
165-
features_table = features_table.append_column(
166-
col, transformed_arrow[col]
167-
)
202+
# Only append the column if it was requested, or if we don't have feature metadata
203+
if (
204+
requested_features_for_odfv is None
205+
or col in requested_features_for_odfv
206+
):
207+
features_table = features_table.append_column(
208+
col, transformed_arrow[col]
209+
)
168210

169211
if validation_reference:
170212
if not flags_helper.is_test():

sdk/python/feast/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ def compute_non_entity_date_range(
127127
end_date: Optional[datetime] = None,
128128
default_window_days: int = 30,
129129
) -> Tuple[datetime, datetime]:
130-
131130
if end_date is None:
132131
end_date = datetime.now(tz=timezone.utc)
133132
else:

sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -840,3 +840,102 @@ def test_historical_features_non_entity_retrieval(environment):
840840
assert 300 in actual_trips, (
841841
"Latest trip value 300 for driver 1002 should be present"
842842
)
843+
844+
845+
@pytest.mark.integration
846+
@pytest.mark.universal_offline_stores
847+
@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: f"full:{v}")
848+
def test_odfv_projection(environment, universal_data_sources, full_feature_names):
849+
"""
850+
Test that requesting a subset of ODFV features only returns those features.
851+
852+
Regression test for issue #6099: OnDemandFeatureViews should honor output
853+
projection in offline retrieval, matching the behavior of online retrieval.
854+
855+
Before the fix, offline retrieval would return ALL ODFV output features even
856+
when only a subset was requested, while online retrieval correctly returned
857+
only the requested features.
858+
"""
859+
store = environment.feature_store
860+
861+
(entities, datasets, data_sources) = universal_data_sources
862+
863+
feature_views = construct_universal_feature_views(data_sources)
864+
865+
# Add request data needed for ODFV
866+
entity_df_with_request_data = datasets.entity_df.copy(deep=True)
867+
entity_df_with_request_data["val_to_add"] = [
868+
i for i in range(len(entity_df_with_request_data))
869+
]
870+
871+
store.apply([driver(), *feature_views.values()])
872+
873+
# The conv_rate_plus_100 ODFV has 3 output features:
874+
# - conv_rate_plus_100
875+
# - conv_rate_plus_val_to_add
876+
# - conv_rate_plus_100_rounded
877+
878+
# Test 1: Request only ONE ODFV feature
879+
job = store.get_historical_features(
880+
entity_df=entity_df_with_request_data,
881+
features=[
882+
"conv_rate_plus_100:conv_rate_plus_100", # Request only this one
883+
],
884+
full_feature_names=full_feature_names,
885+
)
886+
887+
actual_df = job.to_df()
888+
889+
# Determine expected column names based on full_feature_names setting
890+
expected_feature = (
891+
"conv_rate_plus_100__conv_rate_plus_100"
892+
if full_feature_names
893+
else "conv_rate_plus_100"
894+
)
895+
unrequested_feature_1 = (
896+
"conv_rate_plus_100__conv_rate_plus_val_to_add"
897+
if full_feature_names
898+
else "conv_rate_plus_val_to_add"
899+
)
900+
unrequested_feature_2 = (
901+
"conv_rate_plus_100__conv_rate_plus_100_rounded"
902+
if full_feature_names
903+
else "conv_rate_plus_100_rounded"
904+
)
905+
906+
# Verify the requested feature is present
907+
assert expected_feature in actual_df.columns, (
908+
f"Requested feature '{expected_feature}' should be in the result"
909+
)
910+
911+
# Verify unrequested ODFV features are NOT present (this is the key fix)
912+
assert unrequested_feature_1 not in actual_df.columns, (
913+
f"Unrequested ODFV feature '{unrequested_feature_1}' should NOT be in the result. "
914+
f"This indicates the bug from issue #6099 still exists."
915+
)
916+
assert unrequested_feature_2 not in actual_df.columns, (
917+
f"Unrequested ODFV feature '{unrequested_feature_2}' should NOT be in the result. "
918+
f"This indicates the bug from issue #6099 still exists."
919+
)
920+
921+
# Test 2: Request TWO out of THREE ODFV features
922+
job2 = store.get_historical_features(
923+
entity_df=entity_df_with_request_data,
924+
features=[
925+
"conv_rate_plus_100:conv_rate_plus_100",
926+
"conv_rate_plus_100:conv_rate_plus_val_to_add",
927+
# Deliberately NOT requesting conv_rate_plus_100_rounded
928+
],
929+
full_feature_names=full_feature_names,
930+
)
931+
932+
actual_df2 = job2.to_df()
933+
934+
# Verify the two requested features are present
935+
assert expected_feature in actual_df2.columns
936+
assert unrequested_feature_1 in actual_df2.columns
937+
938+
# Verify the unrequested feature is NOT present
939+
assert unrequested_feature_2 not in actual_df2.columns, (
940+
f"Unrequested ODFV feature '{unrequested_feature_2}' should NOT be in the result"
941+
)

0 commit comments

Comments
 (0)