feast-dev · franciscojavierarceo · Aug 19, 2025 · Aug 16, 2025
@@ -379,6 +379,7 @@ def materialize_incremental_command(ctx: click.Context, end_ts: str, views: List
             "ikv",
             "couchbase",
             "milvus",
+            "ray",
         ],
         case_sensitive=False,
     ),

@@ -24,6 +24,7 @@
     RayDAGRetrievalJob,
     RayMaterializationJob,
 )
+from feast.infra.compute_engines.ray.utils import write_to_online_store
 from feast.infra.offline_stores.offline_store import RetrievalJob
 from feast.infra.registry.base_registry import BaseRegistry
 
@@ -203,11 +204,12 @@ def _materialize_from_offline_store(
             arrow_table = retrieval_job.to_arrow()
 
             # Write to online store if enabled
-            if getattr(feature_view, "online", False):
-                # TODO: Implement proper online store writing with correct data format conversion
-                logger.debug(
-                    "Online store writing not implemented yet for Ray compute engine"
-                )
+            write_to_online_store(
+                arrow_table=arrow_table,
+                feature_view=feature_view,
+                online_store=self.online_store,
+                repo_config=self.repo_config,
+            )
 
             # Write to offline store if enabled (this handles sink_source automatically for derived views)
             if getattr(feature_view, "offline", False):

@@ -18,6 +18,10 @@
 from feast.infra.compute_engines.dag.node import DAGNode
 from feast.infra.compute_engines.dag.value import DAGValue
 from feast.infra.compute_engines.ray.config import RayComputeEngineConfig
+from feast.infra.compute_engines.ray.utils import (
+    safe_batch_processor,
+    write_to_online_store,
+)
 from feast.infra.compute_engines.utils import create_offline_store_retrieval_job
 from feast.infra.ray_shared_utils import (
     apply_field_mapping,
@@ -149,9 +153,8 @@ def execute(self, context: ExecutionContext) -> DAGValue:
             feature_df = feature_dataset.to_pandas()
             feature_ref = ray.put(feature_df)
 
+            @safe_batch_processor
             def join_with_aggregated_features(batch: pd.DataFrame) -> pd.DataFrame:
-                if batch.empty:
-                    return batch
                 features = ray.get(feature_ref)
                 if join_keys:
                     result = pd.merge(
@@ -226,10 +229,9 @@ def execute(self, context: ExecutionContext) -> DAGValue:
         input_value.assert_format(DAGFormat.RAY)
         dataset: Dataset = input_value.data
 
+        @safe_batch_processor
         def apply_filters(batch: pd.DataFrame) -> pd.DataFrame:
             """Apply TTL and custom filters to the batch."""
-            if batch.empty:
-                return batch
 
             filtered_batch = batch.copy()
 
@@ -447,11 +449,9 @@ def execute(self, context: ExecutionContext) -> DAGValue:
         input_value.assert_format(DAGFormat.RAY)
         dataset: Dataset = input_value.data
 
+        @safe_batch_processor
         def deduplicate_batch(batch: pd.DataFrame) -> pd.DataFrame:
             """Remove duplicates from the batch."""
-            if batch.empty:
-                return batch
-
             # Get deduplication keys
             join_keys = self.column_info.join_keys
             timestamp_col = self.column_info.timestamp_column
@@ -518,27 +518,21 @@ def execute(self, context: ExecutionContext) -> DAGValue:
         elif callable(self.transformation):
             transformation_serialized = dill.dumps(self.transformation)
 
+        @safe_batch_processor
         def apply_transformation_with_serialized_udf(
             batch: pd.DataFrame,
         ) -> pd.DataFrame:
             """Apply the transformation using pre-serialized UDF."""
-            if batch.empty:
-                return batch
-
-            try:
-                if transformation_serialized:
-                    transformation_func = dill.loads(transformation_serialized)
-                    transformed_batch = transformation_func(batch)
-                else:
-                    logger.warning(
-                        "No serialized transformation available, returning original batch"
-                    )
-                    transformed_batch = batch
+            if transformation_serialized:
+                transformation_func = dill.loads(transformation_serialized)
+                transformed_batch = transformation_func(batch)
+            else:
+                logger.warning(
+                    "No serialized transformation available, returning original batch"
+                )
+                transformed_batch = batch
 
-                return transformed_batch
-            except Exception as e:
-                logger.error(f"Transformation failed: {e}")
-                return batch
+            return transformed_batch
 
         transformed_dataset = dataset.map_batches(
             apply_transformation_with_serialized_udf, batch_format="pandas"
@@ -645,46 +639,36 @@ def execute(self, context: ExecutionContext) -> DAGValue:
             feature_view=self.feature_view, repo_config=context.repo_config
         )
 
+        @safe_batch_processor
         def write_batch_with_serialized_artifacts(batch: pd.DataFrame) -> pd.DataFrame:
             """Write each batch using pre-serialized artifacts."""
-            if batch.empty:
-                return batch
-
-            try:
-                (
-                    feature_view,
-                    online_store,
-                    offline_store,
-                    repo_config,
-                ) = serialized_artifacts.unserialize()
-
-                arrow_table = pa.Table.from_pandas(batch)
-
-                # Write to online store if enabled
-                if getattr(feature_view, "online", False):
-                    # TODO: Implement proper online store writing with correct data format conversion
-                    logger.debug(
-                        "Online store writing not implemented yet for Ray compute engine"
-                    )
-
-                # Write to offline store if enabled
-                if getattr(feature_view, "offline", False):
-                    try:
-                        offline_store.offline_write_batch(
-                            config=repo_config,
-                            feature_view=feature_view,
-                            table=arrow_table,
-                            progress=lambda x: None,
-                        )
-                    except Exception as e:
-                        logger.error(f"Failed to write to offline store: {e}")
-                        raise
+            (
+                feature_view,
+                online_store,
+                offline_store,
+                repo_config,
+            ) = serialized_artifacts.unserialize()
+
+            arrow_table = pa.Table.from_pandas(batch)
+
+            # Write to online store if enabled
+            write_to_online_store(
+                arrow_table=arrow_table,
+                feature_view=feature_view,
+                online_store=online_store,
+                repo_config=repo_config,
+            )
 
-                return batch
+            # Write to offline store if enabled
+            if getattr(feature_view, "offline", False):
+                offline_store.offline_write_batch(
+                    config=repo_config,
+                    feature_view=feature_view,
+                    table=arrow_table,
+                    progress=lambda x: None,
+                )
 
-            except Exception as e:
-                logger.error(f"Write operation failed: {e}")
-                raise
+            return batch
 
         written_dataset = dataset.map_batches(
             write_batch_with_serialized_artifacts, batch_format="pandas"

@@ -0,0 +1,93 @@
+"""
+Utility functions for Ray compute engine.
+"""
+
+import logging
+from typing import Callable, Dict, Union
+
+import pandas as pd
+import pyarrow as pa
+
+from feast.batch_feature_view import BatchFeatureView
+from feast.feature_view import FeatureView
+from feast.infra.online_stores.online_store import OnlineStore
+from feast.repo_config import RepoConfig
+from feast.stream_feature_view import StreamFeatureView
+from feast.utils import _convert_arrow_to_proto
+from feast.value_type import ValueType
+
+logger = logging.getLogger(__name__)
+
+
+def write_to_online_store(
+    arrow_table: pa.Table,
+    feature_view: Union[BatchFeatureView, StreamFeatureView, FeatureView],
+    online_store: OnlineStore,
+    repo_config: RepoConfig,
+) -> None:
+    """
+    Writes Arrow table data to the online store.
+
+    Args:
+        arrow_table: Arrow table containing the data to write
+        feature_view: Feature view being materialized
+        online_store: Online store instance
+        repo_config: Repository configuration
+    """
+    if not getattr(feature_view, "online", False):
+        return
+
+    try:
+        join_key_to_value_type: Dict[str, ValueType] = {}
+        if hasattr(feature_view, "entity_columns") and feature_view.entity_columns:
+            join_key_to_value_type = {
+                entity.name: entity.dtype.to_value_type()
+                for entity in feature_view.entity_columns
+            }
+
+        rows_to_write = _convert_arrow_to_proto(
+            arrow_table, feature_view, join_key_to_value_type
+        )
+
+        if rows_to_write:
+            online_store.online_write_batch(
+                config=repo_config,
+                table=feature_view,
+                data=rows_to_write,
+                progress=lambda x: None,
+            )
+            logger.debug(
+                f"Successfully wrote {len(rows_to_write)} rows to online store for {feature_view.name}"
+            )
+        else:
+            logger.warning(f"No rows to write for {feature_view.name}")
+
+    except Exception as e:
+        logger.error(f"Failed to write to online store for {feature_view.name}: {e}")
+
+
+def safe_batch_processor(
+    func: Callable[[pd.DataFrame], pd.DataFrame],
+) -> Callable[[pd.DataFrame], pd.DataFrame]:
+    """
+    Decorator for batch processing functions that handles empty batches and errors gracefully.
+
+    Args:
+        func: Function that processes a pandas DataFrame batch
+
+    Returns:
+        Wrapped function that handles empty batches and exceptions
+    """
+
+    def wrapper(batch: pd.DataFrame) -> pd.DataFrame:
+        # Handle empty batches
+        if batch.empty:
+            return batch
+
+        try:
+            return func(batch)
+        except Exception as e:
+            logger.error(f"Batch processing failed in {func.__name__}: {e}")
+            return batch
+
+    return wrapper
@@ -0,0 +1,41 @@
+# Feast Ray Template
+
+This template demonstrates Feast's Ray integration, showcasing both the **Ray Offline Store** and **Ray Compute Engine** capabilities for distributed feature processing.
+
+## What's Included
+
+```
+ray_template/
+├── feature_repo/
+│   ├── feature_store.yaml      # Ray offline store + compute engine config
+│   ├── example_repo.py         # Feature definitions with Ray optimizations
+│   ├── test_workflow.py        # Demo script showing Ray capabilities
+│   └── data/                   # Sample datasets (generated by bootstrap)
+│       ├── driver_stats.parquet
+│       └── customer_daily_profile.parquet
+└── README.md                   # This file
+```
+
+
+## Getting Started
+
+1. **Initialize the template**:
+   ```bash
+   feast init -t ray my_ray_project
+   cd my_ray_project/feature_repo
+   ```
+
+2. **Install Ray dependencies**:
+   ```bash
+   pip install feast[ray]
+   ```
+
+3. **Apply feature definitions**:
+   ```bash
+   feast apply
+   ```
+
+4. **Run the demo**:
+   ```bash
+   python test_workflow.py
+   ```
-Original file line number
+Diff line change
@@ Expand Up @@
                 "ikv",
                 "couchbase",
                 "milvus",
+                "ray",
             ],
             case_sensitive=False,
         ),
@@ Expand Down @@