-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Expand file tree
/
Copy pathutils.py
More file actions
93 lines (75 loc) · 2.76 KB
/
utils.py
File metadata and controls
93 lines (75 loc) · 2.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
"""
Utility functions for Ray compute engine.
"""
import logging
from typing import Callable, Dict, Union
import pandas as pd
import pyarrow as pa
from feast.batch_feature_view import BatchFeatureView
from feast.feature_view import FeatureView
from feast.infra.online_stores.online_store import OnlineStore
from feast.repo_config import RepoConfig
from feast.stream_feature_view import StreamFeatureView
from feast.utils import _convert_arrow_to_proto
from feast.value_type import ValueType
logger = logging.getLogger(__name__)
def write_to_online_store(
arrow_table: pa.Table,
feature_view: Union[BatchFeatureView, StreamFeatureView, FeatureView],
online_store: OnlineStore,
repo_config: RepoConfig,
) -> None:
"""
Writes Arrow table data to the online store.
Args:
arrow_table: Arrow table containing the data to write
feature_view: Feature view being materialized
online_store: Online store instance
repo_config: Repository configuration
"""
if not getattr(feature_view, "online", False):
return
try:
join_key_to_value_type: Dict[str, ValueType] = {}
if hasattr(feature_view, "entity_columns") and feature_view.entity_columns:
join_key_to_value_type = {
entity.name: entity.dtype.to_value_type()
for entity in feature_view.entity_columns
}
rows_to_write = _convert_arrow_to_proto(
arrow_table, feature_view, join_key_to_value_type
)
if rows_to_write:
online_store.online_write_batch(
config=repo_config,
table=feature_view,
data=rows_to_write,
progress=lambda x: None,
)
logger.debug(
f"Successfully wrote {len(rows_to_write)} rows to online store for {feature_view.name}"
)
else:
logger.warning(f"No rows to write for {feature_view.name}")
except Exception as e:
logger.error(f"Failed to write to online store for {feature_view.name}: {e}")
def safe_batch_processor(
func: Callable[[pd.DataFrame], pd.DataFrame],
) -> Callable[[pd.DataFrame], pd.DataFrame]:
"""
Decorator for batch processing functions that handles empty batches and errors gracefully.
Args:
func: Function that processes a pandas DataFrame batch
Returns:
Wrapped function that handles empty batches and exceptions
"""
def wrapper(batch: pd.DataFrame) -> pd.DataFrame:
# Handle empty batches
if batch.empty:
return batch
try:
return func(batch)
except Exception as e:
logger.error(f"Batch processing failed in {func.__name__}: {e}")
return batch
return wrapper