Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion docs/getting-started/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,16 @@ Yes, this is possible. For example, you can use BigQuery as an offline store and

### How do I run `get_historical_features` without providing an entity dataframe?

Feast does not provide a way to do this right now. This is an area we're actively interested in contributions for. See [GitHub issue](https://github.com/feast-dev/feast/issues/1611)
Feast does supports fetching historical features without passing an entity dataframe with the request.
- As of today, only `postgres offline feature store` is supported for entity dataframe less retrieval. Remaining offline stores would be gradually updated to support the entity df less retrieval. The stores would be selected based on priorities and user base/request.
- The retrieval is based on `start_date` and `end_date` parameters to the function. Here are some combinations supported.
- Both params are given, Returns data during the given start to end timerange.
- Only start_date param is given, Returns data from the start date to `now` time.
- Only end_date param is given, Returns data during the end_date minus TTL time in feature view.
- Both params are `not` given, Returns data during the TTL time in feature view to now time.
- When multiple features are requested from multiple feature-views it is required to have entity ids in both of them for `JOIN` so that

This is an area we're actively interested in contributions for. See [GitHub issue](https://github.com/feast-dev/feast/issues/1611)

### Does Feast provide security or access control?

Expand Down
57 changes: 45 additions & 12 deletions sdk/python/feast/cli/features.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from datetime import datetime
from typing import List

import click
Expand Down Expand Up @@ -140,37 +141,69 @@ def get_online_features(ctx: click.Context, entities: List[str], features: List[
"--dataframe",
"-d",
type=str,
required=True,
help='JSON string containing entities and timestamps. Example: \'[{"event_timestamp": "2025-03-29T12:00:00", "driver_id": 1001}]\'',
)
@click.option(
"--features",
"-f",
multiple=True,
required=True,
help="Features to retrieve. feature-view:feature-name ex: driver_hourly_stats:conv_rate",
)
@click.option(
"--start-date",
"-s",
type=str,
help="Start date for historical feature retrieval. Format: YYYY-MM-DD HH:MM:SS",
)
@click.option(
"--end-date",
"-e",
type=str,
help="End date for historical feature retrieval. Format: YYYY-MM-DD HH:MM:SS",
)
@click.pass_context
def get_historical_features(ctx: click.Context, dataframe: str, features: List[str]):
def get_historical_features(
ctx: click.Context,
dataframe: str,
features: List[str],
start_date: str,
end_date: str,
):
"""
Fetch historical feature values for a given entity ID
"""
store = create_feature_store(ctx)
try:
entity_list = json.loads(dataframe)
if not isinstance(entity_list, list):
raise ValueError("Entities must be a list of dictionaries.")

entity_df = pd.DataFrame(entity_list)
entity_df["event_timestamp"] = pd.to_datetime(entity_df["event_timestamp"])
if not dataframe and not start_date and not end_date:
click.echo(
"Either --dataframe or --start-date and/or --end-date must be provided."
)
return

except Exception as e:
click.echo(f"Error parsing entities JSON: {e}", err=True)
if dataframe and (start_date or end_date):
click.echo("Cannot specify both --dataframe and --start-date/--end-date.")
return

entity_df = None
if dataframe:
try:
entity_list = json.loads(dataframe)
if not isinstance(entity_list, list):
raise ValueError("Entities must be a list of dictionaries.")

entity_df = pd.DataFrame(entity_list)
entity_df["event_timestamp"] = pd.to_datetime(entity_df["event_timestamp"])

except Exception as e:
click.echo(f"Error parsing entities JSON: {e}", err=True)
return

feature_vector = store.get_historical_features(
entity_df=entity_df,
features=list(features),
start_date=datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S")
if start_date
else None,
end_date=datetime.strptime(end_date, "%Y-%m-%d %H:%M:%S") if end_date else None,
).to_df()

click.echo(feature_vector.to_json(orient="records", indent=4))
39 changes: 32 additions & 7 deletions sdk/python/feast/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1087,14 +1087,17 @@ def teardown(self):

def get_historical_features(
self,
entity_df: Union[pd.DataFrame, str],
features: Union[List[str], FeatureService],
entity_df: Optional[Union[pd.DataFrame, str]] = None,
features: Union[List[str], FeatureService] = [],
full_feature_names: bool = False,
start_date: Optional[datetime] = None,
end_date: Optional[datetime] = None,
) -> RetrievalJob:
"""Enrich an entity dataframe with historical feature values for either training or batch scoring.

This method joins historical feature data from one or more feature views to an entity dataframe by using a time
travel join.
travel join. Alternatively, features can be retrieved for a specific timestamp range without requiring an entity
dataframe.

Each feature view is joined to the entity dataframe using all entities configured for the respective feature
view. All configured entities must be available in the entity dataframe. Therefore, the entity dataframe must
Expand All @@ -1105,16 +1108,21 @@ def get_historical_features(
TTL may result in null values being returned.

Args:
entity_df (Union[pd.DataFrame, str]): An entity dataframe is a collection of rows containing all entity
columns (e.g., customer_id, driver_id) on which features need to be joined, as well as a event_timestamp
column used to ensure point-in-time correctness. Either a Pandas DataFrame can be provided or a string
SQL query. The query must be of a format supported by the configured offline store (e.g., BigQuery)
features: The list of features that should be retrieved from the offline store. These features can be
specified either as a list of string feature references or as a feature service. String feature
references must have format "feature_view:feature", e.g. "customer_fv:daily_transactions".
entity_df (Optional[Union[pd.DataFrame, str]]): An entity dataframe is a collection of rows containing all entity
columns (e.g., customer_id, driver_id) on which features need to be joined, as well as a event_timestamp
column used to ensure point-in-time correctness. Either a Pandas DataFrame can be provided or a string
SQL query. The query must be of a format supported by the configured offline store (e.g., BigQuery).
If not provided, features will be retrieved for the specified timestamp range without entity joins.
full_feature_names: If True, feature names will be prefixed with the corresponding feature view name,
changing them from the format "feature" to "feature_view__feature" (e.g. "daily_transactions"
changes to "customer_fv__daily_transactions").
start_date (Optional[datetime]): Start date for the timestamp range when retrieving features without entity_df.
Required when entity_df is not provided.
end_date (Optional[datetime]): End date for the timestamp range when retrieving features without entity_df.
Required when entity_df is not provided. By default, the current time is used.

Returns:
RetrievalJob which can be used to materialize the results.
Expand Down Expand Up @@ -1147,6 +1155,15 @@ def get_historical_features(
... )
>>> feature_data = retrieval_job.to_df()
"""

if entity_df is not None and (start_date is not None or end_date is not None):
raise ValueError(
"Cannot specify both entity_df and start_date/end_date. Use either entity_df for entity-based retrieval or start_date/end_date for timestamp range retrieval."
)

if entity_df is None and end_date is None:
end_date = datetime.now()

_feature_refs = utils._get_features(self._registry, self.project, features)
(
all_feature_views,
Expand Down Expand Up @@ -1180,6 +1197,13 @@ def get_historical_features(
utils._validate_feature_refs(_feature_refs, full_feature_names)
provider = self._get_provider()

# Optional kwargs
kwargs = {}
if start_date is not None:
kwargs["start_date"] = start_date
if end_date is not None:
kwargs["end_date"] = end_date

job = provider.get_historical_features(
self.config,
feature_views,
Expand All @@ -1188,6 +1212,7 @@ def get_historical_features(
self._registry,
self.project,
full_feature_names,
**kwargs,
)

return job
Expand Down
Loading
Loading