Skip to content

Commit 3ef331d

Browse files
authored
Refactor providers to remove duplicate implementations (#1876)
* Refactor providers to remove duplicate implementations Signed-off-by: Achal Shah <achals@gmail.com> * Refactor Signed-off-by: Achal Shah <achals@gmail.com> * refactor Signed-off-by: Achal Shah <achals@gmail.com> * refactor Signed-off-by: Achal Shah <achals@gmail.com> * fix imports Signed-off-by: Achal Shah <achals@gmail.com> * Dynamic import for passthru Signed-off-by: Achal Shah <achals@gmail.com> * Dynamic import for passthru Signed-off-by: Achal Shah <achals@gmail.com> * remove init files Signed-off-by: Achal Shah <achals@gmail.com>
1 parent 8a49a65 commit 3ef331d

5 files changed

Lines changed: 174 additions & 423 deletions

File tree

sdk/python/feast/infra/aws.py

Lines changed: 7 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -3,152 +3,21 @@
33
from datetime import datetime
44
from pathlib import Path
55
from tempfile import TemporaryFile
6-
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
76
from urllib.parse import urlparse
87

9-
import pandas
10-
from tqdm import tqdm
11-
12-
from feast import FeatureTable
13-
from feast.entity import Entity
148
from feast.errors import S3RegistryBucketForbiddenAccess, S3RegistryBucketNotExist
15-
from feast.feature_view import FeatureView
16-
from feast.infra.offline_stores.offline_utils import get_offline_store_from_config
17-
from feast.infra.online_stores.helpers import get_online_store_from_config
18-
from feast.infra.provider import (
19-
Provider,
20-
RetrievalJob,
21-
_convert_arrow_to_proto,
22-
_get_column_names,
23-
_run_field_mapping,
24-
)
9+
from feast.infra.passthrough_provider import PassthroughProvider
2510
from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto
26-
from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
27-
from feast.protos.feast.types.Value_pb2 import Value as ValueProto
28-
from feast.registry import Registry
2911
from feast.registry_store import RegistryStore
30-
from feast.repo_config import RegistryConfig, RepoConfig
31-
32-
33-
class AwsProvider(Provider):
34-
def __init__(self, config: RepoConfig):
35-
self.repo_config = config
36-
self.offline_store = get_offline_store_from_config(config.offline_store)
37-
self.online_store = get_online_store_from_config(config.online_store)
38-
39-
def update_infra(
40-
self,
41-
project: str,
42-
tables_to_delete: Sequence[Union[FeatureTable, FeatureView]],
43-
tables_to_keep: Sequence[Union[FeatureTable, FeatureView]],
44-
entities_to_delete: Sequence[Entity],
45-
entities_to_keep: Sequence[Entity],
46-
partial: bool,
47-
):
48-
self.online_store.update(
49-
config=self.repo_config,
50-
tables_to_delete=tables_to_delete,
51-
tables_to_keep=tables_to_keep,
52-
entities_to_keep=entities_to_keep,
53-
entities_to_delete=entities_to_delete,
54-
partial=partial,
55-
)
56-
57-
def teardown_infra(
58-
self,
59-
project: str,
60-
tables: Sequence[Union[FeatureTable, FeatureView]],
61-
entities: Sequence[Entity],
62-
) -> None:
63-
self.online_store.teardown(self.repo_config, tables, entities)
64-
65-
def online_write_batch(
66-
self,
67-
config: RepoConfig,
68-
table: Union[FeatureTable, FeatureView],
69-
data: List[
70-
Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]
71-
],
72-
progress: Optional[Callable[[int], Any]],
73-
) -> None:
74-
self.online_store.online_write_batch(config, table, data, progress)
75-
76-
def online_read(
77-
self,
78-
config: RepoConfig,
79-
table: Union[FeatureTable, FeatureView],
80-
entity_keys: List[EntityKeyProto],
81-
requested_features: List[str] = None,
82-
) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
83-
result = self.online_store.online_read(config, table, entity_keys)
12+
from feast.repo_config import RegistryConfig
8413

85-
return result
8614

87-
def materialize_single_feature_view(
88-
self,
89-
config: RepoConfig,
90-
feature_view: FeatureView,
91-
start_date: datetime,
92-
end_date: datetime,
93-
registry: Registry,
94-
project: str,
95-
tqdm_builder: Callable[[int], tqdm],
96-
) -> None:
97-
entities = []
98-
for entity_name in feature_view.entities:
99-
entities.append(registry.get_entity(entity_name, project))
15+
class AwsProvider(PassthroughProvider):
16+
"""
17+
This class only exists for backwards compatibility.
18+
"""
10019

101-
(
102-
join_key_columns,
103-
feature_name_columns,
104-
event_timestamp_column,
105-
created_timestamp_column,
106-
) = _get_column_names(feature_view, entities)
107-
108-
offline_job = self.offline_store.pull_latest_from_table_or_query(
109-
config=config,
110-
data_source=feature_view.batch_source,
111-
join_key_columns=join_key_columns,
112-
feature_name_columns=feature_name_columns,
113-
event_timestamp_column=event_timestamp_column,
114-
created_timestamp_column=created_timestamp_column,
115-
start_date=start_date,
116-
end_date=end_date,
117-
)
118-
119-
table = offline_job.to_arrow()
120-
121-
if feature_view.batch_source.field_mapping is not None:
122-
table = _run_field_mapping(table, feature_view.batch_source.field_mapping)
123-
124-
join_keys = [entity.join_key for entity in entities]
125-
rows_to_write = _convert_arrow_to_proto(table, feature_view, join_keys)
126-
127-
with tqdm_builder(len(rows_to_write)) as pbar:
128-
self.online_write_batch(
129-
self.repo_config, feature_view, rows_to_write, lambda x: pbar.update(x)
130-
)
131-
132-
def get_historical_features(
133-
self,
134-
config: RepoConfig,
135-
feature_views: List[FeatureView],
136-
feature_refs: List[str],
137-
entity_df: Union[pandas.DataFrame, str],
138-
registry: Registry,
139-
project: str,
140-
full_feature_names: bool,
141-
) -> RetrievalJob:
142-
job = self.offline_store.get_historical_features(
143-
config=config,
144-
feature_views=feature_views,
145-
feature_refs=feature_refs,
146-
entity_df=entity_df,
147-
registry=registry,
148-
project=project,
149-
full_feature_names=full_feature_names,
150-
)
151-
return job
20+
pass
15221

15322

15423
class S3RegistryStore(RegistryStore):

sdk/python/feast/infra/gcp.py

Lines changed: 7 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -2,153 +2,20 @@
22
from datetime import datetime
33
from pathlib import Path
44
from tempfile import TemporaryFile
5-
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
65
from urllib.parse import urlparse
76

8-
import pandas
9-
from tqdm import tqdm
10-
11-
from feast import FeatureTable
12-
from feast.entity import Entity
13-
from feast.feature_view import FeatureView
14-
from feast.infra.offline_stores.offline_utils import get_offline_store_from_config
15-
from feast.infra.online_stores.helpers import get_online_store_from_config
16-
from feast.infra.provider import (
17-
Provider,
18-
RetrievalJob,
19-
_convert_arrow_to_proto,
20-
_get_column_names,
21-
_run_field_mapping,
22-
)
7+
from feast.infra.passthrough_provider import PassthroughProvider
238
from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto
24-
from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
25-
from feast.protos.feast.types.Value_pb2 import Value as ValueProto
26-
from feast.registry import Registry
279
from feast.registry_store import RegistryStore
28-
from feast.repo_config import RegistryConfig, RepoConfig
29-
30-
31-
class GcpProvider(Provider):
32-
_gcp_project_id: Optional[str]
33-
_namespace: Optional[str]
34-
35-
def __init__(self, config: RepoConfig):
36-
self.repo_config = config
37-
self.offline_store = get_offline_store_from_config(config.offline_store)
38-
self.online_store = get_online_store_from_config(config.online_store)
39-
40-
def update_infra(
41-
self,
42-
project: str,
43-
tables_to_delete: Sequence[Union[FeatureTable, FeatureView]],
44-
tables_to_keep: Sequence[Union[FeatureTable, FeatureView]],
45-
entities_to_delete: Sequence[Entity],
46-
entities_to_keep: Sequence[Entity],
47-
partial: bool,
48-
):
49-
self.online_store.update(
50-
config=self.repo_config,
51-
tables_to_delete=tables_to_delete,
52-
tables_to_keep=tables_to_keep,
53-
entities_to_keep=entities_to_keep,
54-
entities_to_delete=entities_to_delete,
55-
partial=partial,
56-
)
57-
58-
def teardown_infra(
59-
self,
60-
project: str,
61-
tables: Sequence[Union[FeatureTable, FeatureView]],
62-
entities: Sequence[Entity],
63-
) -> None:
64-
self.online_store.teardown(self.repo_config, tables, entities)
65-
66-
def online_write_batch(
67-
self,
68-
config: RepoConfig,
69-
table: Union[FeatureTable, FeatureView],
70-
data: List[
71-
Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]
72-
],
73-
progress: Optional[Callable[[int], Any]],
74-
) -> None:
75-
self.online_store.online_write_batch(config, table, data, progress)
76-
77-
def online_read(
78-
self,
79-
config: RepoConfig,
80-
table: Union[FeatureTable, FeatureView],
81-
entity_keys: List[EntityKeyProto],
82-
requested_features: List[str] = None,
83-
) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
84-
result = self.online_store.online_read(config, table, entity_keys)
10+
from feast.repo_config import RegistryConfig
8511

86-
return result
8712

88-
def materialize_single_feature_view(
89-
self,
90-
config: RepoConfig,
91-
feature_view: FeatureView,
92-
start_date: datetime,
93-
end_date: datetime,
94-
registry: Registry,
95-
project: str,
96-
tqdm_builder: Callable[[int], tqdm],
97-
) -> None:
98-
entities = []
99-
for entity_name in feature_view.entities:
100-
entities.append(registry.get_entity(entity_name, project))
13+
class GcpProvider(PassthroughProvider):
14+
"""
15+
This class only exists for backwards compatibility.
16+
"""
10117

102-
(
103-
join_key_columns,
104-
feature_name_columns,
105-
event_timestamp_column,
106-
created_timestamp_column,
107-
) = _get_column_names(feature_view, entities)
108-
109-
offline_job = self.offline_store.pull_latest_from_table_or_query(
110-
config=config,
111-
data_source=feature_view.batch_source,
112-
join_key_columns=join_key_columns,
113-
feature_name_columns=feature_name_columns,
114-
event_timestamp_column=event_timestamp_column,
115-
created_timestamp_column=created_timestamp_column,
116-
start_date=start_date,
117-
end_date=end_date,
118-
)
119-
table = offline_job.to_arrow()
120-
121-
if feature_view.batch_source.field_mapping is not None:
122-
table = _run_field_mapping(table, feature_view.batch_source.field_mapping)
123-
124-
join_keys = [entity.join_key for entity in entities]
125-
rows_to_write = _convert_arrow_to_proto(table, feature_view, join_keys)
126-
127-
with tqdm_builder(len(rows_to_write)) as pbar:
128-
self.online_write_batch(
129-
self.repo_config, feature_view, rows_to_write, lambda x: pbar.update(x)
130-
)
131-
132-
def get_historical_features(
133-
self,
134-
config: RepoConfig,
135-
feature_views: List[FeatureView],
136-
feature_refs: List[str],
137-
entity_df: Union[pandas.DataFrame, str],
138-
registry: Registry,
139-
project: str,
140-
full_feature_names: bool,
141-
) -> RetrievalJob:
142-
job = self.offline_store.get_historical_features(
143-
config=config,
144-
feature_views=feature_views,
145-
feature_refs=feature_refs,
146-
entity_df=entity_df,
147-
registry=registry,
148-
project=project,
149-
full_feature_names=full_feature_names,
150-
)
151-
return job
18+
pass
15219

15320

15421
class GCSRegistryStore(RegistryStore):

0 commit comments

Comments
 (0)