Skip to content

Commit 4b801ba

Browse files
committed
update backend
Signed-off-by: HaoXuAI <sduxuhao@gmail.com>
1 parent 2dd267b commit 4b801ba

File tree

7 files changed

+61
-28
lines changed

7 files changed

+61
-28
lines changed

docs/reference/compute-engine/README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,11 @@ This system builds and executes DAGs (Directed Acyclic Graphs) of typed operatio
3131
- Supports point-in-time joins and large-scale materialization
3232
- Integrates with `SparkOfflineStore` and `SparkMaterializationJob`
3333

34-
### 🧪 LocalComputeEngine (WIP)
34+
### 🧪 LocalComputeEngine
3535

36-
- Runs on Arrow + Pandas (or optionally DuckDB)
36+
- Runs on Arrow + Specified backend (e.g., Pandas, Polars)
3737
- Designed for local dev, testing, or lightweight feature generation
38+
- Supports `LocalMaterializationJob` and `LocalHistoricalRetrievalJob`
3839

3940
---
4041

sdk/python/feast/infra/compute_engines/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
import pyarrow as pa
55

66
from feast import RepoConfig
7-
from feast.infra.compute_engines.dag.context import ColumnInfo, ExecutionContext
8-
from feast.infra.compute_engines.tasks import HistoricalRetrievalTask
9-
from feast.infra.materialization.batch_materialization_engine import (
7+
from feast.infra.common.materialization_job import (
108
MaterializationJob,
119
MaterializationTask,
1210
)
11+
from feast.infra.common.retrieval_task import HistoricalRetrievalTask
12+
from feast.infra.compute_engines.dag.context import ColumnInfo, ExecutionContext
1313
from feast.infra.offline_stores.offline_store import OfflineStore
1414
from feast.infra.online_stores.online_store import OnlineStore
1515
from feast.infra.registry.registry import Registry

sdk/python/feast/infra/compute_engines/feature_builder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
from abc import ABC, abstractmethod
22
from typing import Union
33

4+
from feast.infra.common.materialization_job import MaterializationTask
5+
from feast.infra.common.retrieval_task import HistoricalRetrievalTask
46
from feast.infra.compute_engines.dag.node import DAGNode
57
from feast.infra.compute_engines.dag.plan import ExecutionPlan
6-
from feast.infra.compute_engines.tasks import HistoricalRetrievalTask
7-
from feast.infra.materialization.batch_materialization_engine import MaterializationTask
88

99

1010
class FeatureBuilder(ABC):

sdk/python/feast/infra/compute_engines/local/backends/base.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,53 @@
33

44

55
class DataFrameBackend(ABC):
6+
"""
7+
Abstract interface for DataFrame operations used by the LocalComputeEngine.
8+
9+
This interface defines the contract for implementing pluggable DataFrame backends
10+
such as Pandas, Polars, or DuckDB. Each backend must support core table operations
11+
such as joins, filtering, aggregation, conversion to/from Arrow, and deduplication.
12+
13+
The purpose of this abstraction is to allow seamless swapping of execution backends
14+
without changing DAGNode or ComputeEngine logic. All nodes operate on pyarrow.Table
15+
as the standard input/output format, while the backend defines how the computation
16+
is actually performed.
17+
18+
Expected implementations include:
19+
- PandasBackend
20+
- PolarsBackend
21+
- DuckDBBackend (future)
22+
23+
Methods
24+
-------
25+
from_arrow(table: pa.Table) -> Any
26+
Convert a pyarrow.Table to the backend-native DataFrame format.
27+
28+
to_arrow(df: Any) -> pa.Table
29+
Convert a backend-native DataFrame to pyarrow.Table.
30+
31+
join(left: Any, right: Any, on: List[str], how: str) -> Any
32+
Join two dataframes on specified keys with given join type.
33+
34+
groupby_agg(df: Any, group_keys: List[str], agg_ops: Dict[str, Tuple[str, str]]) -> Any
35+
Group and aggregate the dataframe. `agg_ops` maps output column names
36+
to (aggregation function, source column name) pairs.
37+
38+
filter(df: Any, expr: str) -> Any
39+
Apply a filter expression (string-based) to the DataFrame.
40+
41+
to_timedelta_value(delta: timedelta) -> Any
42+
Convert a Python timedelta object to a backend-compatible value
43+
that can be subtracted from a timestamp column.
44+
45+
drop_duplicates(df: Any, keys: List[str], sort_by: List[str], ascending: bool = False) -> Any
46+
Deduplicate the DataFrame by key columns, keeping the first row
47+
by descending or ascending sort order.
48+
49+
rename_columns(df: Any, columns: Dict[str, str]) -> Any
50+
Rename columns in the DataFrame according to the provided mapping.
51+
"""
52+
653
@abstractmethod
754
def columns(self, df): ...
855

sdk/python/feast/infra/compute_engines/local/backends/factory.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88

99

1010
class BackendFactory:
11+
"""
12+
Factory class for constructing DataFrameBackend implementations based on backend name
13+
or runtime entity_df type.
14+
"""
15+
1116
@staticmethod
1217
def from_name(name: str) -> DataFrameBackend:
1318
if name == "pandas":

sdk/python/feast/infra/compute_engines/local/config.py

Lines changed: 0 additions & 20 deletions
This file was deleted.

sdk/python/tests/integration/compute_engines/spark/test_compute.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
MaterializationJobStatus,
1515
MaterializationTask,
1616
)
17-
from feast.infra.common.retrieval_job import HistoricalRetrievalTask
17+
from feast.infra.common.retrieval_task import HistoricalRetrievalTask
1818
from feast.infra.compute_engines.spark.compute import SparkComputeEngine
1919
from feast.infra.compute_engines.spark.job import SparkDAGRetrievalJob
2020
from feast.infra.offline_stores.contrib.spark_offline_store.spark import (

0 commit comments

Comments
 (0)