Skip to content

Commit 92cfb48

Browse files
author
hkuepers
committed
Update lambda materialization engine
Signed-off-by: hkuepers <hanno.kuepers@ratepay.com>
1 parent ef3dc05 commit 92cfb48

File tree

2 files changed

+70
-57
lines changed

2 files changed

+70
-57
lines changed
Lines changed: 65 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,84 +1,94 @@
1+
"""Based on from feast.infra.materialization.aws_lambda."""
2+
13
import base64
2-
import json
3-
import sys
4+
import logging
45
import tempfile
5-
import traceback
66
from pathlib import Path
77

88
import pyarrow.parquet as pq
99

1010
from feast import FeatureStore
1111
from feast.constants import FEATURE_STORE_YAML_ENV_NAME
12-
from feast.infra.materialization.local_engine import DEFAULT_BATCH_SIZE
12+
from feast.infra.compute_engines.aws_lambda.lambda_engine import DEFAULT_BATCH_SIZE
1313
from feast.utils import _convert_arrow_to_proto, _run_pyarrow_field_mapping
1414

15+
logger = logging.getLogger()
16+
logger.setLevel("INFO")
1517

1618
def handler(event, context):
17-
"""Provide an event that contains the following keys:
18-
19-
- operation: one of the operations in the operations dict below
20-
- tableName: required for operations that interact with DynamoDB
21-
- payload: a parameter to pass to the operation being performed
19+
"""Load a parquet file and write the feature values to the online store.
20+
21+
Args:
22+
event (dict): payload containing the following keys:
23+
FEATURE_STORE_YAML_ENV_NAME: Base64 encoded feature store config
24+
view_name: Name of FeatureView to be materialized
25+
view_type: Type of FeatureView
26+
path: Path to parquet batch file on S3 bucket
27+
context (dict): Lambda runtime context, not used.
2228
"""
23-
print("Received event: " + json.dumps(event, indent=2), flush=True)
29+
logger.info(f"Received event: {event}")
2430

2531
try:
2632
config_base64 = event[FEATURE_STORE_YAML_ENV_NAME]
2733

2834
config_bytes = base64.b64decode(config_base64)
2935

3036
# Create a new unique directory for writing feature_store.yaml
31-
repo_path = Path(tempfile.mkdtemp())
32-
33-
with open(repo_path / "feature_store.yaml", "wb") as f:
34-
f.write(config_bytes)
37+
with tempfile.TemporaryDirectory() as repo_posix_path:
38+
repo_path = Path(repo_posix_path)
3539

36-
# Initialize the feature store
37-
store = FeatureStore(repo_path=str(repo_path.resolve()))
40+
with open(repo_path / "feature_store.yaml", "wb") as f:
41+
f.write(config_bytes)
3842

39-
view_name = event["view_name"]
40-
view_type = event["view_type"]
41-
path = event["path"]
43+
# Initialize the feature store
44+
store = FeatureStore(repo_path=str(repo_path.resolve()))
4245

43-
bucket = path[len("s3://") :].split("/", 1)[0]
44-
key = path[len("s3://") :].split("/", 1)[1]
45-
print(f"Inferred Bucket: `{bucket}` Key: `{key}`", flush=True)
46+
view_name = event["view_name"]
47+
view_type = event["view_type"]
48+
path = event["path"]
4649

47-
if view_type == "batch":
48-
# TODO: This probably needs to be become `store.get_batch_feature_view` at some point.
49-
feature_view = store.get_feature_view(view_name)
50-
else:
51-
feature_view = store.get_stream_feature_view(view_name)
50+
bucket, key = path[len("s3://") :].split("/", 1)
51+
logger.info(f"Inferred Bucket: `{bucket}` Key: `{key}`")
5252

53-
print(f"Got Feature View: `{feature_view}`", flush=True)
53+
if view_type == "batch":
54+
# TODO: This probably needs to be become `store.get_batch_feature_view` at some point. # noqa: E501,W505
55+
feature_view = store.get_feature_view(view_name)
56+
else:
57+
feature_view = store.get_stream_feature_view(view_name)
5458

55-
table = pq.read_table(path)
56-
if feature_view.batch_source.field_mapping is not None:
57-
table = _run_pyarrow_field_mapping(
58-
table, feature_view.batch_source.field_mapping
59+
logger.info(
60+
f"Got Feature View: `{feature_view.name}`, \
61+
last updated: {feature_view.last_updated_timestamp}"
5962
)
6063

61-
join_key_to_value_type = {
62-
entity.name: entity.dtype.to_value_type()
63-
for entity in feature_view.entity_columns
64-
}
65-
66-
written_rows = 0
67-
68-
for batch in table.to_batches(DEFAULT_BATCH_SIZE):
69-
rows_to_write = _convert_arrow_to_proto(
70-
batch, feature_view, join_key_to_value_type
71-
)
72-
store._provider.online_write_batch(
73-
store.config,
74-
feature_view,
75-
rows_to_write,
76-
lambda x: None,
64+
table = pq.read_table(path)
65+
if feature_view.batch_source.field_mapping is not None:
66+
table = _run_pyarrow_field_mapping(
67+
table, feature_view.batch_source.field_mapping
68+
)
69+
70+
join_key_to_value_type = {
71+
entity.name: entity.dtype.to_value_type()
72+
for entity in feature_view.entity_columns
73+
}
74+
75+
written_rows = 0
76+
77+
for batch in table.to_batches(DEFAULT_BATCH_SIZE):
78+
rows_to_write = _convert_arrow_to_proto(
79+
batch, feature_view, join_key_to_value_type
80+
)
81+
store._provider.online_write_batch(
82+
store.config,
83+
feature_view,
84+
rows_to_write,
85+
lambda x: None,
86+
)
87+
written_rows += len(rows_to_write)
88+
logger.info(
89+
f"Successfully updated {written_rows} rows.",
90+
extra={"num_updated_rows": written_rows, "feature_view": view_name},
7791
)
78-
written_rows += len(rows_to_write)
79-
return {"written_rows": written_rows}
80-
except Exception as e:
81-
print(f"Exception: {e}", flush=True)
82-
print("Traceback:", flush=True)
83-
print(traceback.format_exc(), flush=True)
84-
sys.exit(1)
92+
except Exception:
93+
logger.exception("Error in processing materialization.")
94+
raise

sdk/python/feast/infra/compute_engines/aws_lambda/lambda_engine.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,12 @@ def update(
108108
r = self.lambda_client.create_function(
109109
FunctionName=self.lambda_name,
110110
PackageType="Image",
111-
Role=self.repo_config.batch_engine.lambda_role,
112-
Code={"ImageUri": self.repo_config.batch_engine.materialization_image},
111+
Role=self.repo_config.batch_engine_config.lambda_role,
112+
Code={"ImageUri": self.repo_config.batch_engine_config.materialization_image},
113113
Timeout=DEFAULT_TIMEOUT,
114+
LoggingConfig={
115+
"LogFormat": "JSON",
116+
},
114117
Tags={
115118
"feast-owned": "True",
116119
"project": project,

0 commit comments

Comments
 (0)