Skip to content

Commit 467a5b9

Browse files
committed
move old materialization engines into compute engines
Signed-off-by: HaoXuAI <sduxuhao@gmail.com>
1 parent 1729394 commit 467a5b9

11 files changed

Lines changed: 208 additions & 218 deletions

File tree

sdk/python/feast/infra/compute_engines/aws_lambda/lambda_engine.py

Lines changed: 35 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,12 @@
33
import logging
44
from concurrent.futures import ThreadPoolExecutor, wait
55
from dataclasses import dataclass
6-
from datetime import datetime
7-
from typing import Callable, List, Literal, Optional, Sequence, Union
6+
from typing import Literal, Optional, Sequence, Union
87

98
import boto3
109
import pyarrow as pa
10+
from infra.common.retrieval_task import HistoricalRetrievalTask
1111
from pydantic import StrictStr
12-
from tqdm import tqdm
1312

1413
from feast import utils
1514
from feast.batch_feature_view import BatchFeatureView
@@ -21,6 +20,7 @@
2120
MaterializationJobStatus,
2221
MaterializationTask,
2322
)
23+
from feast.infra.compute_engines.base import ComputeEngine
2424
from feast.infra.offline_stores.offline_store import OfflineStore
2525
from feast.infra.online_stores.online_store import OnlineStore
2626
from feast.infra.registry.base_registry import BaseRegistry
@@ -29,8 +29,6 @@
2929
from feast.stream_feature_view import StreamFeatureView
3030
from feast.utils import _get_column_names
3131
from feast.version import get_version
32-
from feast.infra.compute_engines.base import ComputeEngine
33-
from infra.common.retrieval_task import HistoricalRetrievalTask
3432

3533
DEFAULT_BATCH_SIZE = 10_000
3634

@@ -52,9 +50,7 @@ class LambdaComputeEngineConfig(FeastConfigBaseModel):
5250

5351
@dataclass
5452
class LambdaMaterializationJob(MaterializationJob):
55-
def __init__(self,
56-
job_id: str,
57-
status: MaterializationJobStatus) -> None:
53+
def __init__(self, job_id: str, status: MaterializationJobStatus) -> None:
5854
super().__init__()
5955
self._job_id: str = job_id
6056
self._status = status
@@ -81,24 +77,24 @@ class LambdaComputeEngine(ComputeEngine):
8177
WARNING: This engine should be considered "Alpha" functionality.
8278
"""
8379

84-
def get_historical_features(self,
85-
registry: BaseRegistry,
86-
task: HistoricalRetrievalTask) -> pa.Table:
80+
def get_historical_features(
81+
self, registry: BaseRegistry, task: HistoricalRetrievalTask
82+
) -> pa.Table:
8783
raise NotImplementedError(
8884
"Lambda Compute Engine does not support get_historical_features"
8985
)
9086

9187
def update(
92-
self,
93-
project: str,
94-
views_to_delete: Sequence[
95-
Union[BatchFeatureView, StreamFeatureView, FeatureView, OnDemandFeatureView]
96-
],
97-
views_to_keep: Sequence[
98-
Union[BatchFeatureView, StreamFeatureView, FeatureView, OnDemandFeatureView]
99-
],
100-
entities_to_delete: Sequence[Entity],
101-
entities_to_keep: Sequence[Entity],
88+
self,
89+
project: str,
90+
views_to_delete: Sequence[
91+
Union[BatchFeatureView, StreamFeatureView, FeatureView, OnDemandFeatureView]
92+
],
93+
views_to_keep: Sequence[
94+
Union[BatchFeatureView, StreamFeatureView, FeatureView, OnDemandFeatureView]
95+
],
96+
entities_to_delete: Sequence[Entity],
97+
entities_to_keep: Sequence[Entity],
10298
):
10399
# This should be setting up the lambda function.
104100
r = self.lambda_client.create_function(
@@ -124,23 +120,23 @@ def update(
124120
waiter.wait(FunctionName=self.lambda_name)
125121

126122
def teardown_infra(
127-
self,
128-
project: str,
129-
fvs: Sequence[Union[BatchFeatureView, StreamFeatureView, FeatureView]],
130-
entities: Sequence[Entity],
123+
self,
124+
project: str,
125+
fvs: Sequence[Union[BatchFeatureView, StreamFeatureView, FeatureView]],
126+
entities: Sequence[Entity],
131127
):
132128
# This should be tearing down the lambda function.
133129
logger.info("Tearing down lambda %s", self.lambda_name)
134130
r = self.lambda_client.delete_function(FunctionName=self.lambda_name)
135131
logger.info("Finished tearing down lambda %s: %s", self.lambda_name, r)
136132

137133
def __init__(
138-
self,
139-
*,
140-
repo_config: RepoConfig,
141-
offline_store: OfflineStore,
142-
online_store: OnlineStore,
143-
**kwargs,
134+
self,
135+
*,
136+
repo_config: RepoConfig,
137+
offline_store: OfflineStore,
138+
online_store: OnlineStore,
139+
**kwargs,
144140
):
145141
super().__init__(
146142
repo_config=repo_config,
@@ -160,32 +156,16 @@ def __init__(
160156
self.lambda_name = self.lambda_name[:64]
161157
self.lambda_client = boto3.client("lambda")
162158

163-
def materialize(
164-
self,
165-
registry: BaseRegistry,
166-
tasks: List[MaterializationTask]
167-
) -> List[MaterializationJob]:
168-
return [
169-
self._materialize_one(
170-
registry,
171-
task.feature_view,
172-
task.start_time,
173-
task.end_time,
174-
task.project,
175-
task.tqdm_builder,
176-
)
177-
for task in tasks
178-
]
179-
180159
def _materialize_one(
181-
self,
182-
registry: BaseRegistry,
183-
feature_view: Union[BatchFeatureView, StreamFeatureView, FeatureView],
184-
start_date: datetime,
185-
end_date: datetime,
186-
project: str,
187-
tqdm_builder: Callable[[int], tqdm],
160+
self,
161+
registry: BaseRegistry,
162+
task: MaterializationTask,
188163
):
164+
feature_view = task.feature_view
165+
start_date = task.start_time
166+
end_date = task.end_time
167+
project = task.project
168+
189169
entities = []
190170
for entity_name in feature_view.entities:
191171
entities.append(registry.get_entity(entity_name, project))

sdk/python/feast/infra/compute_engines/base.py

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,24 @@
11
from abc import ABC, abstractmethod
2-
from typing import Union
2+
from typing import List, Sequence, Union
33

44
import pyarrow as pa
5-
from typing import Sequence, Union
5+
66
from feast import RepoConfig
7+
from feast.batch_feature_view import BatchFeatureView
8+
from feast.entity import Entity
9+
from feast.feature_view import FeatureView
710
from feast.infra.common.materialization_job import (
811
MaterializationJob,
912
MaterializationTask,
1013
)
11-
from feast.entity import Entity
1214
from feast.infra.common.retrieval_task import HistoricalRetrievalTask
1315
from feast.infra.compute_engines.dag.context import ColumnInfo, ExecutionContext
1416
from feast.infra.offline_stores.offline_store import OfflineStore
1517
from feast.infra.online_stores.online_store import OnlineStore
16-
from feast.infra.registry.registry import Registry
17-
from feast.utils import _get_column_names
18-
from feast.feature_view import FeatureView
18+
from feast.infra.registry.base_registry import BaseRegistry
1919
from feast.on_demand_feature_view import OnDemandFeatureView
2020
from feast.stream_feature_view import StreamFeatureView
21-
from feast.batch_feature_view import BatchFeatureView
22-
from feast.infra.registry.base_registry import BaseRegistry
21+
from feast.utils import _get_column_names
2322

2423

2524
class ComputeEngine(ABC):
@@ -87,14 +86,27 @@ def teardown_infra(
8786
"""
8887
pass
8988

90-
def materialize(self,
91-
registry: BaseRegistry,
92-
task: MaterializationTask) -> MaterializationJob:
93-
raise NotImplementedError
89+
def materialize(
90+
self,
91+
registry: BaseRegistry,
92+
tasks: Union[MaterializationTask, List[MaterializationTask]],
93+
) -> List[MaterializationJob]:
94+
if isinstance(tasks, MaterializationTask):
95+
tasks = [tasks]
96+
return [self._materialize_one(registry, task) for task in tasks]
97+
98+
def _materialize_one(
99+
self,
100+
registry: BaseRegistry,
101+
task: MaterializationTask,
102+
) -> MaterializationJob:
103+
raise NotImplementedError(
104+
"Materialization is not implemented for this compute engine."
105+
)
94106

95-
def get_historical_features(self,
96-
registry: BaseRegistry,
97-
task: HistoricalRetrievalTask) -> pa.Table:
107+
def get_historical_features(
108+
self, registry: BaseRegistry, task: HistoricalRetrievalTask
109+
) -> pa.Table:
98110
raise NotImplementedError
99111

100112
def get_execution_context(

sdk/python/feast/infra/compute_engines/kubernetes/k8s_engine.py

Lines changed: 13 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import logging
22
import uuid
3-
from datetime import datetime
43
from time import sleep
5-
from typing import Callable, List, Literal, Union
4+
from typing import List, Literal
65

76
import pyarrow as pa
87
import yaml
@@ -11,25 +10,21 @@
1110
from kubernetes.client.exceptions import ApiException
1211
from kubernetes.utils import FailToCreateError
1312
from pydantic import StrictStr
14-
from tqdm import tqdm
1513

16-
from feast import FeatureView, RepoConfig
17-
from feast.batch_feature_view import BatchFeatureView
14+
from feast import RepoConfig
1815
from feast.infra.common.materialization_job import (
19-
MaterializationJob,
2016
MaterializationJobStatus,
2117
MaterializationTask,
2218
)
19+
from feast.infra.common.retrieval_task import HistoricalRetrievalTask
20+
from feast.infra.compute_engines.base import ComputeEngine
2321
from feast.infra.offline_stores.offline_store import OfflineStore
2422
from feast.infra.online_stores.online_store import OnlineStore
2523
from feast.infra.registry.base_registry import BaseRegistry
2624
from feast.repo_config import FeastConfigBaseModel
27-
from feast.stream_feature_view import StreamFeatureView
2825
from feast.utils import _get_column_names
2926

3027
from .k8s_materialization_job import KubernetesMaterializationJob
31-
from feast.infra.compute_engines.base import ComputeEngine
32-
from feast.infra.common.retrieval_task import HistoricalRetrievalTask
3328

3429
logger = logging.getLogger(__name__)
3530

@@ -93,9 +88,9 @@ class KubernetesComputeEngineConfig(FeastConfigBaseModel):
9388

9489

9590
class KubernetesComputeEngine(ComputeEngine):
96-
def get_historical_features(self,
97-
registry: BaseRegistry,
98-
task: HistoricalRetrievalTask) -> pa.Table:
91+
def get_historical_features(
92+
self, registry: BaseRegistry, task: HistoricalRetrievalTask
93+
) -> pa.Table:
9994
raise NotImplementedError(
10095
"KubernetesComputeEngine does not support get_historical_features()"
10196
)
@@ -126,32 +121,16 @@ def __init__(
126121
self.batch_engine_config = repo_config.batch_engine
127122
self.namespace = self.batch_engine_config.namespace
128123

129-
def materialize(
130-
self,
131-
registry: BaseRegistry,
132-
tasks: List[MaterializationTask],
133-
) -> List[MaterializationJob]:
134-
return [
135-
self._materialize_one(
136-
registry,
137-
task.feature_view,
138-
task.start_time,
139-
task.end_time,
140-
task.project,
141-
task.tqdm_builder,
142-
)
143-
for task in tasks
144-
]
145-
146124
def _materialize_one(
147125
self,
148126
registry: BaseRegistry,
149-
feature_view: Union[BatchFeatureView, StreamFeatureView, FeatureView],
150-
start_date: datetime,
151-
end_date: datetime,
152-
project: str,
153-
tqdm_builder: Callable[[int], tqdm],
127+
task: MaterializationTask,
154128
):
129+
feature_view = task.feature_view
130+
start_date = task.start_time
131+
end_date = task.end_time
132+
project = task.project
133+
155134
entities = []
156135
for entity_name in feature_view.entities:
157136
entities.append(registry.get_entity(entity_name, project))

0 commit comments

Comments
 (0)