Skip to content
Closed
1 change: 1 addition & 0 deletions docs/getting-started/genai.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ The transformation workflow typically involves:
3. **Chunking**: Split documents into smaller, semantically meaningful chunks
4. **Embedding Generation**: Convert text chunks into vector embeddings
5. **Storage**: Store embeddings and metadata in Feast's feature store

### Feature Transformation for LLMs

Feast supports transformations that can be used to:
Expand Down
93 changes: 61 additions & 32 deletions sdk/python/feast/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ def _make_inferences(
def _get_feature_views_to_materialize(
self,
feature_views: Optional[List[str]],
) -> List[FeatureView]:
) -> List[Union[FeatureView, OnDemandFeatureView]]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting that we are returning only FeatureView/OnDemandFeatureView, but in reality when feature_views are not given, we are returning everything including Stream Feature Views.

The return list needs to change here.

"""
Returns the list of feature views that should be materialized.

Expand All @@ -669,34 +669,53 @@ def _get_feature_views_to_materialize(
FeatureViewNotFoundException: One of the specified feature views could not be found.
ValueError: One of the specified feature views is not configured for materialization.
"""
feature_views_to_materialize: List[FeatureView] = []
feature_views_to_materialize: List[Union[FeatureView, OnDemandFeatureView]] = []

if feature_views is None:
feature_views_to_materialize = utils._list_feature_views(
regular_feature_views = utils._list_feature_views(
self._registry, self.project, hide_dummy_entity=False
)
feature_views_to_materialize = [
fv for fv in feature_views_to_materialize if fv.online
]
feature_views_to_materialize.extend(
[fv for fv in regular_feature_views if fv.online]
)
stream_feature_views_to_materialize = self._list_stream_feature_views(
hide_dummy_entity=False
)
feature_views_to_materialize += [
sfv for sfv in stream_feature_views_to_materialize if sfv.online
]
feature_views_to_materialize.extend(
[sfv for sfv in stream_feature_views_to_materialize if sfv.online]
)
on_demand_feature_views_to_materialize = self.list_on_demand_feature_views()
feature_views_to_materialize.extend(
[
odfv
for odfv in on_demand_feature_views_to_materialize
if odfv.write_to_online_store
]
)
else:
for name in feature_views:
feature_view: Union[FeatureView, OnDemandFeatureView]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we accept one more parameter to _get_feature_views_to_materialize for returning the list of different types of feature views depending on purpose.

Its really confusing why we are returning different FV types list in if and else blocks .

try:
feature_view = self._get_feature_view(name, hide_dummy_entity=False)
except FeatureViewNotFoundException:
feature_view = self._get_stream_feature_view(
name, hide_dummy_entity=False
)
try:
feature_view = self._get_stream_feature_view(
name, hide_dummy_entity=False
)
except FeatureViewNotFoundException:
feature_view = self.get_on_demand_feature_view(name)

if not feature_view.online:
if hasattr(feature_view, "online") and not feature_view.online:
raise ValueError(
f"FeatureView {feature_view.name} is not configured to be served online."
)
elif (
hasattr(feature_view, "write_to_online_store")
and not feature_view.write_to_online_store
):
raise ValueError(
f"OnDemandFeatureView {feature_view.name} is not configured for write_to_online_store."
)
feature_views_to_materialize.append(feature_view)

return feature_views_to_materialize
Expand Down Expand Up @@ -866,7 +885,8 @@ def apply(
views_to_update = [
ob
for ob in objects
if (
if
(
# BFVs are not handled separately from FVs right now.
(isinstance(ob, FeatureView) or isinstance(ob, BatchFeatureView))
and not isinstance(ob, StreamFeatureView)
Expand Down Expand Up @@ -1312,6 +1332,11 @@ def materialize_incremental(
)
# TODO paging large loads
for feature_view in feature_views_to_materialize:
from feast.on_demand_feature_view import OnDemandFeatureView
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to avoid import twice, can put this at top of the file?


if isinstance(feature_view, OnDemandFeatureView):
continue
Comment on lines +1337 to +1338
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For my knowledge:
What does it mean that the ODFV cannot be materialized?


start_date = feature_view.most_recent_end_time
if start_date is None:
if feature_view.ttl is None:
Expand Down Expand Up @@ -1352,12 +1377,13 @@ def tqdm_builder(length):
tqdm_builder=tqdm_builder,
)

self._registry.apply_materialization(
feature_view,
self.project,
start_date,
end_date,
)
if not isinstance(feature_view, OnDemandFeatureView):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since We are continuing the loop at line 1337&1338, we dont need this condition here.

self._registry.apply_materialization(
feature_view,
self.project,
start_date,
end_date,
)

def materialize(
self,
Expand Down Expand Up @@ -1407,6 +1433,8 @@ def materialize(
)
# TODO paging large loads
for feature_view in feature_views_to_materialize:
from feast.on_demand_feature_view import OnDemandFeatureView

provider = self._get_provider()
print(f"{Style.BRIGHT + Fore.GREEN}{feature_view.name}{Style.RESET_ALL}:")

Expand All @@ -1426,12 +1454,13 @@ def tqdm_builder(length):
tqdm_builder=tqdm_builder,
)

self._registry.apply_materialization(
feature_view,
self.project,
start_date,
end_date,
)
if not isinstance(feature_view, OnDemandFeatureView):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than waiting until this line for ODFV based skip, Continue the looping before getting the provider (line1438).

kind of bringing uniqueness !

self._registry.apply_materialization(
feature_view,
self.project,
start_date,
end_date,
)

def _fvs_for_push_source_or_raise(
self, push_source_name: str, allow_cache: bool
Expand Down Expand Up @@ -2033,9 +2062,9 @@ def retrieve_online_documents_v2(
distance_metric: The distance metric to use for retrieval.
query_string: The query string to retrieve the closest document features using keyword search (bm25).
"""
assert query is not None or query_string is not None, (
"Either query or query_string must be provided."
)
assert (
query is not None or query_string is not None
), "Either query or query_string must be provided."

(
available_feature_views,
Expand Down Expand Up @@ -2348,9 +2377,9 @@ def write_logged_features(
if not isinstance(source, FeatureService):
raise ValueError("Only feature service is currently supported as a source")

assert source.logging_config is not None, (
"Feature service must be configured with logging config in order to use this functionality"
)
assert (
source.logging_config is not None
), "Feature service must be configured with logging config in order to use this functionality"

assert isinstance(logs, (pa.Table, Path))

Expand Down
6 changes: 3 additions & 3 deletions sdk/python/feast/feature_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,9 @@ def __init__(
else:
features.append(field)

assert len([f for f in features if f.vector_index]) < 2, (
f"Only one vector feature is allowed per feature view. Please update {self.name}."
)
assert (
len([f for f in features if f.vector_index]) < 2
), f"Only one vector feature is allowed per feature view. Please update {self.name}."

# TODO(felixwang9817): Add more robust validation of features.
cols = [field.name for field in schema]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,9 @@ def __init__(
online_store: OnlineStore,
**kwargs,
):
assert repo_config.offline_store.type == "snowflake.offline", (
"To use Snowflake Compute Engine, you must use Snowflake as an offline store."
)
assert (
repo_config.offline_store.type == "snowflake.offline"
), "To use Snowflake Compute Engine, you must use Snowflake as an offline store."

super().__init__(
repo_config=repo_config,
Expand All @@ -210,11 +210,10 @@ def _materialize_one(
project = task.project
tqdm_builder = task.tqdm_builder if task.tqdm_builder else tqdm

assert isinstance(feature_view, BatchFeatureView) or isinstance(
feature_view, FeatureView
), (
"Snowflake can only materialize FeatureView & BatchFeatureView feature view types."
)
assert (
isinstance(feature_view, BatchFeatureView)
or isinstance(feature_view, FeatureView)
), "Snowflake can only materialize FeatureView & BatchFeatureView feature view types."

entities = []
for entity_name in feature_view.entities:
Expand Down
4 changes: 3 additions & 1 deletion sdk/python/feast/infra/offline_stores/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,9 @@ def evaluate_historical_retrieval():
):
# Make sure all event timestamp fields are tz-aware. We default tz-naive fields to UTC
entity_df_with_features[entity_df_event_timestamp_col] = (
entity_df_with_features[entity_df_event_timestamp_col].apply(
entity_df_with_features[
entity_df_event_timestamp_col
].apply(
lambda x: x
if x.tzinfo is not None
else x.replace(tzinfo=timezone.utc)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,9 +326,7 @@ def online_read(
assert all(
field in [f["name"] for f in collection["fields"]]
for field in output_fields
), (
f"field(s) [{[field for field in output_fields if field not in [f['name'] for f in collection['fields']]]}] not found in collection schema"
)
), f"field(s) [{[field for field in output_fields if field not in [f['name'] for f in collection['fields']]]}] not found in collection schema"
composite_entities = []
for entity_key in entity_keys:
entity_key_str = serialize_entity_key(
Expand Down Expand Up @@ -522,9 +520,7 @@ def retrieve_online_documents_v2(
assert all(
field in [f["name"] for f in collection["fields"]]
for field in output_fields
), (
f"field(s) [{[field for field in output_fields if field not in [f['name'] for f in collection['fields']]]}] not found in collection schema"
)
), f"field(s) [{[field for field in output_fields if field not in [f['name'] for f in collection['fields']]]}] not found in collection schema"

# Find the vector search field if we need it
ann_search_field = None
Expand Down
6 changes: 3 additions & 3 deletions sdk/python/feast/infra/online_stores/online_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,9 +460,9 @@ def retrieve_online_documents_v2(
where the first item is the event timestamp for the row, and the second item is a dict of feature
name to embeddings.
"""
assert embedding is not None or query_string is not None, (
"Either embedding or query_string must be specified"
)
assert (
embedding is not None or query_string is not None
), "Either embedding or query_string must be specified"
raise NotImplementedError(
f"Online store {self.__class__.__name__} does not support online retrieval"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ def _get_client(self, config: RepoConfig) -> QdrantClient:
if self._client:
return self._client
online_store_config = config.online_store
assert isinstance(online_store_config, QdrantOnlineStoreConfig), (
"Invalid type for online store config"
)
assert isinstance(
online_store_config, QdrantOnlineStoreConfig
), "Invalid type for online store config"

assert online_store_config.similarity and (
online_store_config.similarity.lower() in DISTANCE_MAPPING
Expand Down
12 changes: 6 additions & 6 deletions sdk/python/feast/infra/online_stores/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,12 +790,12 @@ def _get_vector_field(table: FeatureView) -> str:
vector_fields: List[Field] = [
f for f in table.features if getattr(f, "vector_index", None)
]
assert len(vector_fields) > 0, (
f"No vector field found, please update feature view = {table.name} to declare a vector field"
)
assert len(vector_fields) < 2, (
"Only one vector field is supported, please update feature view = {table.name} to declare one vector field"
)
assert (
len(vector_fields) > 0
), f"No vector field found, please update feature view = {table.name} to declare a vector field"
assert (
len(vector_fields) < 2
), "Only one vector field is supported, please update feature view = {table.name} to declare one vector field"
vector_field: str = vector_fields[0].name
return vector_field

Expand Down
26 changes: 18 additions & 8 deletions sdk/python/feast/infra/passthrough_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import pyarrow as pa
from tqdm import tqdm

from feast import OnDemandFeatureView, importer
from feast import importer
from feast.base_feature_view import BaseFeatureView
from feast.batch_feature_view import BatchFeatureView
from feast.data_source import DataSource
Expand All @@ -38,6 +38,7 @@
from feast.infra.provider import Provider
from feast.infra.registry.base_registry import BaseRegistry
from feast.infra.supported_async_methods import ProviderAsyncMethods
from feast.on_demand_feature_view import OnDemandFeatureView
from feast.online_response import OnlineResponse
from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto
from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
Expand Down Expand Up @@ -420,13 +421,22 @@ def ingest_df_to_offline_store(self, feature_view: FeatureView, table: pa.Table)
def materialize_single_feature_view(
self,
config: RepoConfig,
feature_view: FeatureView,
feature_view: Union[FeatureView, OnDemandFeatureView],
start_date: datetime,
end_date: datetime,
registry: BaseRegistry,
project: str,
tqdm_builder: Callable[[int], tqdm],
) -> None:
from feast.on_demand_feature_view import OnDemandFeatureView
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are reapeating the import.


if isinstance(feature_view, OnDemandFeatureView):
if not feature_view.write_to_online_store:
raise ValueError(
f"OnDemandFeatureView {feature_view.name} does not have write_to_online_store enabled"
)
return

assert (
isinstance(feature_view, BatchFeatureView)
or isinstance(feature_view, StreamFeatureView)
Expand Down Expand Up @@ -496,9 +506,9 @@ def write_feature_service_logs(
config: RepoConfig,
registry: BaseRegistry,
):
assert feature_service.logging_config is not None, (
"Logging should be configured for the feature service before calling this function"
)
assert (
feature_service.logging_config is not None
), "Logging should be configured for the feature service before calling this function"

self.offline_store.write_logged_features(
config=config,
Expand All @@ -516,9 +526,9 @@ def retrieve_feature_service_logs(
config: RepoConfig,
registry: BaseRegistry,
) -> RetrievalJob:
assert feature_service.logging_config is not None, (
"Logging should be configured for the feature service before calling this function"
)
assert (
feature_service.logging_config is not None
), "Logging should be configured for the feature service before calling this function"

logging_source = FeatureServiceLoggingSource(feature_service, config.project)
schema = logging_source.get_schema(registry)
Expand Down
2 changes: 1 addition & 1 deletion sdk/python/feast/infra/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def ingest_df_to_offline_store(
def materialize_single_feature_view(
self,
config: RepoConfig,
feature_view: FeatureView,
feature_view: Union[FeatureView, OnDemandFeatureView],
start_date: datetime,
end_date: datetime,
registry: BaseRegistry,
Expand Down
Loading
Loading