-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Expand file tree
/
Copy pathray_shared_utils.py
More file actions
175 lines (144 loc) · 5.14 KB
/
ray_shared_utils.py
File metadata and controls
175 lines (144 loc) · 5.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""Shared fixtures and utilities for Ray compute engine tests."""
import os
import tempfile
import time
import uuid
from datetime import timedelta
from typing import Generator
import pandas as pd
import pytest
from feast import Entity, FileSource
from feast.data_source import DataSource
from feast.infra.ray_initializer import shutdown_ray
from feast.utils import _utc_now
from tests.universal.feature_repos.repo_configuration import (
construct_test_environment,
)
from .repo_configuration import get_ray_compute_engine_test_config
now = _utc_now().replace(microsecond=0, second=0, minute=0)
today = now.replace(hour=0, minute=0, second=0, microsecond=0)
def get_test_date_range(days_back: int = 7) -> tuple:
"""Get a standard test date range (start_date, end_date) for testing."""
end_date = now
start_date = now - timedelta(days=days_back)
return start_date, end_date
driver = Entity(
name="driver_id",
description="driver id",
)
def create_feature_dataset(ray_environment) -> DataSource:
"""Create a test dataset for feature views."""
yesterday = today - timedelta(days=1)
last_week = today - timedelta(days=7)
df = pd.DataFrame(
[
{
"driver_id": 1001,
"event_timestamp": yesterday,
"created": now - timedelta(hours=2),
"conv_rate": 0.8,
"acc_rate": 0.5,
"avg_daily_trips": 15,
},
{
"driver_id": 1001,
"event_timestamp": last_week,
"created": now - timedelta(hours=3),
"conv_rate": 0.75,
"acc_rate": 0.9,
"avg_daily_trips": 14,
},
{
"driver_id": 1002,
"event_timestamp": yesterday,
"created": now - timedelta(hours=2),
"conv_rate": 0.7,
"acc_rate": 0.4,
"avg_daily_trips": 12,
},
{
"driver_id": 1002,
"event_timestamp": yesterday - timedelta(days=1),
"created": now - timedelta(hours=2),
"conv_rate": 0.3,
"acc_rate": 0.6,
"avg_daily_trips": 12,
},
]
)
ds = ray_environment.data_source_creator.create_data_source(
df,
ray_environment.feature_store.project,
timestamp_field="event_timestamp",
created_timestamp_column="created",
)
return ds
def create_entity_df() -> pd.DataFrame:
"""Create entity dataframe for testing."""
entity_df = pd.DataFrame(
[
{"driver_id": 1001, "event_timestamp": today},
{"driver_id": 1002, "event_timestamp": today},
]
)
return entity_df
def create_unique_sink_source(temp_dir: str, base_name: str) -> FileSource:
"""Create a unique sink source to avoid path collisions during parallel test execution."""
timestamp = int(time.time() * 1000)
process_id = os.getpid()
unique_id = str(uuid.uuid4())[:8]
# Create a unique directory for this sink - Ray needs directory paths for materialization
sink_dir = os.path.join(
temp_dir, f"{base_name}_{timestamp}_{process_id}_{unique_id}"
)
os.makedirs(sink_dir, exist_ok=True)
return FileSource(
name=f"{base_name}_sink_source",
path=sink_dir, # Use directory path - Ray will create files inside
timestamp_field="event_timestamp",
created_timestamp_column="created",
)
def cleanup_ray_environment(ray_environment):
"""Safely cleanup Ray environment and resources."""
try:
ray_environment.teardown()
except Exception as e:
print(f"Warning: Ray environment teardown failed: {e}")
# Ensure Ray is shut down completely
try:
shutdown_ray()
time.sleep(0.2) # Brief pause to ensure clean shutdown
except Exception as e:
print(f"Warning: Ray shutdown failed: {e}")
def create_ray_environment():
"""Create Ray test environment using the standardized config."""
ray_config = get_ray_compute_engine_test_config()
ray_environment = construct_test_environment(
ray_config, None, entity_key_serialization_version=3
)
ray_environment.setup()
return ray_environment
@pytest.fixture(scope="function")
def ray_environment() -> Generator:
"""Pytest fixture to provide a Ray environment for tests with automatic cleanup."""
try:
shutdown_ray()
time.sleep(0.2)
except Exception:
pass
environment = create_ray_environment()
yield environment
cleanup_ray_environment(environment)
@pytest.fixture
def feature_dataset(ray_environment) -> DataSource:
"""Fixture that provides a feature dataset for testing."""
return create_feature_dataset(ray_environment)
@pytest.fixture
def entity_df() -> pd.DataFrame:
"""Fixture that provides an entity dataframe for testing."""
return create_entity_df()
@pytest.fixture
def temp_dir() -> Generator[str, None, None]:
"""Fixture that provides a temporary directory for test artifacts."""
with tempfile.TemporaryDirectory() as temp_dir:
yield temp_dir