Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,6 @@ def pull_latest_from_table_or_query(
start_date: datetime,
end_date: datetime,
) -> RetrievalJob:

con = get_ibis_connection(config)

return pull_latest_from_table_or_query_ibis(
Expand Down
48 changes: 45 additions & 3 deletions sdk/python/feast/infra/offline_stores/offline_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,17 +154,59 @@ def to_arrow(
"""
features_table = self._to_arrow_internal(timeout=timeout)
if self.on_demand_feature_views:
# Build a mapping of ODFV name to requested feature names
# This ensures we only return the features that were explicitly requested
odfv_feature_refs = {}
try:
metadata = self.metadata
except NotImplementedError:
metadata = None

if metadata and metadata.features:
for feature_ref in metadata.features:
if ":" in feature_ref:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is going to be brittle after my feature view version PR lands as feature references will now support @vN syntax.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@franciscojavierarceo How would it look like once implemented so that I can make it future proof OR would you like to handle in your PR ?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh I see! It may look like driver_stats@v2:trips_today but I dont see this syntax will break identifying view_name and feature_name. Because separator would still remain same :.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

view_name, feature_name = feature_ref.split(":", 1)
# Check if this view_name matches any of the ODFVs
for odfv in self.on_demand_feature_views:
if (
odfv.name == view_name
or odfv.projection.name_to_use() == view_name
):
if view_name not in odfv_feature_refs:
odfv_feature_refs[view_name] = set()
# Store the feature name in the format that will appear in transformed_arrow
expected_col_name = (
f"{odfv.projection.name_to_use()}__{feature_name}"
if self.full_feature_names
else feature_name
)
odfv_feature_refs[view_name].add(expected_col_name)

for odfv in self.on_demand_feature_views:
transformed_arrow = odfv.transform_arrow(
features_table, self.full_feature_names
)

# Determine which columns to include from this ODFV
# If we have metadata with requested features, filter to only those
# Otherwise, include all columns (backward compatibility)
requested_features_for_odfv = (
odfv_feature_refs.get(odfv.name)
if odfv.name in odfv_feature_refs
else odfv_feature_refs.get(odfv.projection.name_to_use())
)

for col in transformed_arrow.column_names:
if col.startswith("__index"):
continue
features_table = features_table.append_column(
col, transformed_arrow[col]
)
# Only append the column if it was requested, or if we don't have feature metadata
if (
requested_features_for_odfv is None
or col in requested_features_for_odfv
):
features_table = features_table.append_column(
col, transformed_arrow[col]
)

if validation_reference:
if not flags_helper.is_test():
Expand Down
1 change: 0 additions & 1 deletion sdk/python/feast/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def compute_non_entity_date_range(
end_date: Optional[datetime] = None,
default_window_days: int = 30,
) -> Tuple[datetime, datetime]:

if end_date is None:
end_date = datetime.now(tz=timezone.utc)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -840,3 +840,120 @@ def test_historical_features_non_entity_retrieval(environment):
assert 300 in actual_trips, (
"Latest trip value 300 for driver 1002 should be present"
)


@pytest.mark.integration
@pytest.mark.universal_offline_stores
@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: f"full:{v}")
def test_odfv_projection(environment, universal_data_sources, full_feature_names):
"""
Test that requesting a subset of ODFV features only returns those features.

Regression test for issue #6099: OnDemandFeatureViews should honor output
projection in offline retrieval, matching the behavior of online retrieval.

Before the fix, offline retrieval would return ALL ODFV output features even
when only a subset was requested, while online retrieval correctly returned
only the requested features.
"""
store = environment.feature_store

(entities, datasets, data_sources) = universal_data_sources

feature_views = construct_universal_feature_views(data_sources)

# Add request data needed for ODFV
entity_df_with_request_data = datasets.entity_df.copy(deep=True)
entity_df_with_request_data["val_to_add"] = [
i for i in range(len(entity_df_with_request_data))
]

store.apply([driver(), *feature_views.values()])

# The conv_rate_plus_100 ODFV has 3 output features:
# - conv_rate_plus_100
# - conv_rate_plus_val_to_add
# - conv_rate_plus_100_rounded

# Test 1: Request only ONE ODFV feature
job = store.get_historical_features(
entity_df=entity_df_with_request_data,
features=[
"conv_rate_plus_100:conv_rate_plus_100", # Request only this one
],
full_feature_names=full_feature_names,
)

actual_df = job.to_df()

# Determine expected column names based on full_feature_names setting
expected_feature = (
"conv_rate_plus_100__conv_rate_plus_100"
if full_feature_names
else "conv_rate_plus_100"
)
unrequested_feature_1 = (
"conv_rate_plus_100__conv_rate_plus_val_to_add"
if full_feature_names
else "conv_rate_plus_val_to_add"
)
unrequested_feature_2 = (
"conv_rate_plus_100__conv_rate_plus_100_rounded"
if full_feature_names
else "conv_rate_plus_100_rounded"
)

# Verify the requested feature is present
assert expected_feature in actual_df.columns, (
f"Requested feature '{expected_feature}' should be in the result"
)

# Verify unrequested ODFV features are NOT present (this is the key fix)
assert unrequested_feature_1 not in actual_df.columns, (
f"Unrequested ODFV feature '{unrequested_feature_1}' should NOT be in the result. "
f"This indicates the bug from issue #6099 still exists."
)
assert unrequested_feature_2 not in actual_df.columns, (
f"Unrequested ODFV feature '{unrequested_feature_2}' should NOT be in the result. "
f"This indicates the bug from issue #6099 still exists."
)

# Test 2: Request TWO out of THREE ODFV features
job2 = store.get_historical_features(
entity_df=entity_df_with_request_data,
features=[
"conv_rate_plus_100:conv_rate_plus_100",
"conv_rate_plus_100:conv_rate_plus_val_to_add",
# Deliberately NOT requesting conv_rate_plus_100_rounded
],
full_feature_names=full_feature_names,
)

actual_df2 = job2.to_df()

# Verify the two requested features are present
assert expected_feature in actual_df2.columns
assert unrequested_feature_1 in actual_df2.columns

# Verify the unrequested feature is NOT present
assert unrequested_feature_2 not in actual_df2.columns, (
f"Unrequested ODFV feature '{unrequested_feature_2}' should NOT be in the result"
)

# Test 3: Verify consistency with online retrieval
online_response = store.get_online_features(
features=["conv_rate_plus_100:conv_rate_plus_100"],
entity_rows=[{"driver_id": 1001, "val_to_add": 1}],
full_feature_names=full_feature_names,
).to_dict()

# Online retrieval should also NOT include unrequested ODFV features
assert expected_feature in online_response, (
f"Online retrieval should return requested feature '{expected_feature}'"
)
assert unrequested_feature_1 not in online_response, (
f"Online retrieval should NOT return unrequested feature '{unrequested_feature_1}'"
)
assert unrequested_feature_2 not in online_response, (
f"Online retrieval should NOT return unrequested feature '{unrequested_feature_2}'"
)
Loading