update backend

HaoXuAI · HaoXuAI · commit 4b801ba032eb · 2025-04-16T21:33:26.000-07:00
Signed-off-by: HaoXuAI &lt;sduxuhao@gmail.com&gt;
diff --git a/docs/reference/compute-engine/README.md b/docs/reference/compute-engine/README.md
@@ -31,10 +31,11 @@ This system builds and executes DAGs (Directed Acyclic Graphs) of typed operatio
 - Supports point-in-time joins and large-scale materialization
 - Integrates with `SparkOfflineStore` and `SparkMaterializationJob`
 
-### 🧪 LocalComputeEngine (WIP)
+### 🧪 LocalComputeEngine
 
-- Runs on Arrow + Pandas (or optionally DuckDB)
+- Runs on Arrow + Specified backend (e.g., Pandas, Polars)
 - Designed for local dev, testing, or lightweight feature generation
+- Supports `LocalMaterializationJob` and `LocalHistoricalRetrievalJob`
 
 ---
 
diff --git a/sdk/python/feast/infra/compute_engines/base.py b/sdk/python/feast/infra/compute_engines/base.py
@@ -4,12 +4,12 @@
 import pyarrow as pa
 
 from feast import RepoConfig
-from feast.infra.compute_engines.dag.context import ColumnInfo, ExecutionContext
-from feast.infra.compute_engines.tasks import HistoricalRetrievalTask
-from feast.infra.materialization.batch_materialization_engine import (
+from feast.infra.common.materialization_job import (
     MaterializationJob,
     MaterializationTask,
 )
+from feast.infra.common.retrieval_task import HistoricalRetrievalTask
+from feast.infra.compute_engines.dag.context import ColumnInfo, ExecutionContext
 from feast.infra.offline_stores.offline_store import OfflineStore
 from feast.infra.online_stores.online_store import OnlineStore
 from feast.infra.registry.registry import Registry
diff --git a/sdk/python/feast/infra/compute_engines/feature_builder.py b/sdk/python/feast/infra/compute_engines/feature_builder.py
@@ -1,10 +1,10 @@
 from abc import ABC, abstractmethod
 from typing import Union
 
+from feast.infra.common.materialization_job import MaterializationTask
+from feast.infra.common.retrieval_task import HistoricalRetrievalTask
 from feast.infra.compute_engines.dag.node import DAGNode
 from feast.infra.compute_engines.dag.plan import ExecutionPlan
-from feast.infra.compute_engines.tasks import HistoricalRetrievalTask
-from feast.infra.materialization.batch_materialization_engine import MaterializationTask
 
 
 class FeatureBuilder(ABC):
diff --git a/sdk/python/feast/infra/compute_engines/local/backends/base.py b/sdk/python/feast/infra/compute_engines/local/backends/base.py
@@ -3,6 +3,53 @@
 
 
 class DataFrameBackend(ABC):
+    """
+    Abstract interface for DataFrame operations used by the LocalComputeEngine.
+
+    This interface defines the contract for implementing pluggable DataFrame backends
+    such as Pandas, Polars, or DuckDB. Each backend must support core table operations
+    such as joins, filtering, aggregation, conversion to/from Arrow, and deduplication.
+
+    The purpose of this abstraction is to allow seamless swapping of execution backends
+    without changing DAGNode or ComputeEngine logic. All nodes operate on pyarrow.Table
+    as the standard input/output format, while the backend defines how the computation
+    is actually performed.
+
+    Expected implementations include:
+    - PandasBackend
+    - PolarsBackend
+    - DuckDBBackend (future)
+
+    Methods
+    -------
+    from_arrow(table: pa.Table) -> Any
+        Convert a pyarrow.Table to the backend-native DataFrame format.
+
+    to_arrow(df: Any) -> pa.Table
+        Convert a backend-native DataFrame to pyarrow.Table.
+
+    join(left: Any, right: Any, on: List[str], how: str) -> Any
+        Join two dataframes on specified keys with given join type.
+
+    groupby_agg(df: Any, group_keys: List[str], agg_ops: Dict[str, Tuple[str, str]]) -> Any
+        Group and aggregate the dataframe. `agg_ops` maps output column names
+        to (aggregation function, source column name) pairs.
+
+    filter(df: Any, expr: str) -> Any
+        Apply a filter expression (string-based) to the DataFrame.
+
+    to_timedelta_value(delta: timedelta) -> Any
+        Convert a Python timedelta object to a backend-compatible value
+        that can be subtracted from a timestamp column.
+
+    drop_duplicates(df: Any, keys: List[str], sort_by: List[str], ascending: bool = False) -> Any
+        Deduplicate the DataFrame by key columns, keeping the first row
+        by descending or ascending sort order.
+
+    rename_columns(df: Any, columns: Dict[str, str]) -> Any
+        Rename columns in the DataFrame according to the provided mapping.
+    """
+
     @abstractmethod
     def columns(self, df): ...
 
diff --git a/sdk/python/feast/infra/compute_engines/local/backends/factory.py b/sdk/python/feast/infra/compute_engines/local/backends/factory.py
@@ -8,6 +8,11 @@
 
 
 class BackendFactory:
+    """
+    Factory class for constructing DataFrameBackend implementations based on backend name
+    or runtime entity_df type.
+    """
+
     @staticmethod
     def from_name(name: str) -> DataFrameBackend:
         if name == "pandas":
diff --git a/sdk/python/feast/infra/compute_engines/local/config.py b/sdk/python/feast/infra/compute_engines/local/config.py
diff --git a/sdk/python/tests/integration/compute_engines/spark/test_compute.py b/sdk/python/tests/integration/compute_engines/spark/test_compute.py
@@ -14,7 +14,7 @@
     MaterializationJobStatus,
     MaterializationTask,
 )
-from feast.infra.common.retrieval_job import HistoricalRetrievalTask
+from feast.infra.common.retrieval_task import HistoricalRetrievalTask
 from feast.infra.compute_engines.spark.compute import SparkComputeEngine
 from feast.infra.compute_engines.spark.job import SparkDAGRetrievalJob
 from feast.infra.offline_stores.contrib.spark_offline_store.spark import (

Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@`
`14`	`14`	`MaterializationJobStatus,`
`15`	`15`	`MaterializationTask,`
`16`	`16`	`)`
`17`		`-from feast.infra.common.retrieval_job import HistoricalRetrievalTask`
	`17`	`+from feast.infra.common.retrieval_task import HistoricalRetrievalTask`
`18`	`18`	`from feast.infra.compute_engines.spark.compute import SparkComputeEngine`
`19`	`19`	`from feast.infra.compute_engines.spark.job import SparkDAGRetrievalJob`
`20`	`20`	`from feast.infra.offline_stores.contrib.spark_offline_store.spark import (`