-
Notifications
You must be signed in to change notification settings - Fork 108
Expand file tree
/
Copy pathtest_checkpoint_suspend.py
More file actions
242 lines (211 loc) · 7.62 KB
/
test_checkpoint_suspend.py
File metadata and controls
242 lines (211 loc) · 7.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import time
import json
from http import HTTPStatus
from urllib.parse import quote_plus
import pytest
from tests import TEST_CLIENT, enterprise_only
from tests.platform.test_ingress_formats import create_pipeline
from .helper import (
get,
api_url,
post_no_body,
start_pipeline,
start_pipeline_as_paused,
stop_pipeline,
connector_paused,
connector_action,
wait_for_condition,
gen_pipeline_name,
)
def _adhoc_count(name: str) -> int:
path = api_url(
f"/pipelines/{name}/query?sql={quote_plus('SELECT COUNT(*) AS c FROM t1')}&format=json"
)
r = get(path)
if r.status_code != HTTPStatus.OK:
return -1
txt = r.text.strip()
if not txt:
return 0
line = json.loads(txt.split("\n")[0])
return line.get("c") or 0
@gen_pipeline_name
def test_checkpoint_oss(pipeline_name):
"""
On OSS builds (non-enterprise), checkpoint endpoint should return NOT_IMPLEMENTED
with error_code EnterpriseFeature.
Skips itself if running against enterprise edition.
"""
if TEST_CLIENT.get_config().edition.is_enterprise():
pytest.skip("Enterprise edition: use enterprise checkpoint test instead")
sql = "CREATE TABLE t1(x int) WITH ('materialized'='true');"
create_pipeline(pipeline_name, sql)
resp = post_no_body(api_url(f"/pipelines/{pipeline_name}/checkpoint"))
assert resp.status_code == HTTPStatus.NOT_IMPLEMENTED, resp.text
body = resp.json()
assert body.get("error_code") == "EnterpriseFeature", body
@enterprise_only
@gen_pipeline_name
def test_checkpoint_enterprise(pipeline_name):
"""
Enterprise: invoke /checkpoint multiple times, poll /checkpoint_status for completion.
"""
sql = "CREATE TABLE t1(x int) WITH ('materialized'='true');"
create_pipeline(pipeline_name, sql)
start_pipeline_as_paused(pipeline_name)
for _ in range(5):
resp = post_no_body(api_url(f"/pipelines/{pipeline_name}/checkpoint"))
assert resp.status_code == HTTPStatus.OK, (
f"Checkpoint POST failed: {resp.status_code} {resp.text}"
)
seq = resp.json().get("checkpoint_sequence_number")
assert isinstance(seq, int), (
f"Missing checkpoint_sequence_number in {resp.text}"
)
# Poll /checkpoint_status until success == seq.
def checkpoint_seq_reached_success():
status_resp = get(api_url(f"/pipelines/{pipeline_name}/checkpoint_status"))
if status_resp.status_code != HTTPStatus.OK:
return False
return status_resp.json().get("success") == seq
wait_for_condition(
f"checkpoint seq={seq} reaches success",
checkpoint_seq_reached_success,
timeout_s=10.0,
poll_interval_s=0.2,
)
@gen_pipeline_name
def test_suspend_oss(pipeline_name):
"""
On OSS builds, attempting a non-force stop (suspend) should return NOT_IMPLEMENTED with EnterpriseFeature.
Skips itself if enterprise.
"""
if TEST_CLIENT.get_config().edition.is_enterprise():
pytest.skip("Enterprise edition: use enterprise suspend test instead")
sql = "CREATE TABLE t1(x int) WITH ('materialized'='true');"
create_pipeline(pipeline_name, sql)
resp = post_no_body(api_url(f"/pipelines/{pipeline_name}/stop?force=false"))
assert resp.status_code == HTTPStatus.NOT_IMPLEMENTED, (
resp.status_code,
resp.text,
)
body = resp.json()
assert body.get("error_code") == "EnterpriseFeature", body
@enterprise_only
@gen_pipeline_name
def test_suspend_enterprise(pipeline_name):
"""
Enterprise suspend/resume sequence with connector dependencies:
1. All three connectors (c1, c2[label1], c3[start_after label1]) start paused.
2. Start pipeline -> connectors remain paused.
3. Start c1 -> expect 1 record after completion.
4. Suspend pipeline (stop without force) and resume -> c1 should remain running (EOI),
c2,c3 paused.
5. Start c2 -> c2 runs, triggers start_after dependency for c3 -> data from both.
6. Suspend/resume again -> all connectors in EOI, verify no new data arrives.
"""
sql = r"""
CREATE TABLE t1 (
x int
) WITH (
'materialized' = 'true',
'connectors' = '[{
"name": "c1",
"paused": true,
"transport": {
"name": "datagen",
"config": {
"plan": [{
"limit": 1,
"fields": { "x": { "values": [1] } }
}]
}
}
},
{
"name": "c2",
"paused": true,
"labels": ["label1"],
"transport": {
"name": "datagen",
"config": {
"plan": [{
"limit": 3,
"fields": { "x": { "values": [2,3,4] } }
}]
}
}
},
{
"name": "c3",
"paused": true,
"start_after": ["label1"],
"transport": {
"name": "datagen",
"config": {
"plan": [{
"limit": 5,
"fields": { "x": { "values": [5,6,7,8,9] } }
}]
}
}
}]'
);
""".strip()
create_pipeline(pipeline_name, sql)
# Start pipeline (all connectors remain paused)
start_pipeline(pipeline_name)
assert connector_paused(pipeline_name, "t1", "c1")
assert connector_paused(pipeline_name, "t1", "c2")
assert connector_paused(pipeline_name, "t1", "c3")
# Start connector c1
connector_action(pipeline_name, "t1", "c1", "start")
wait_for_condition(
"1 record from c1",
lambda: _adhoc_count(pipeline_name) == 1,
timeout_s=10.0,
poll_interval_s=1.0,
)
# Suspend (non-force) and resume pipeline
stop_pipeline(pipeline_name, force=False)
start_pipeline(pipeline_name)
# After resume: c1 running (EOI), c2,c3 still paused
assert not connector_paused(pipeline_name, "t1", "c1")
assert connector_paused(pipeline_name, "t1", "c2")
assert connector_paused(pipeline_name, "t1", "c3")
# Start c2 (should also allow c3 to run automatically after c2 finishes, due to start_after label)
connector_action(pipeline_name, "t1", "c2", "start")
wait_for_condition(
"9 total records after c2/c3",
lambda: _adhoc_count(pipeline_name) == 9,
timeout_s=15.0,
poll_interval_s=1.0,
)
# Suspend/resume again
stop_pipeline(pipeline_name, force=False)
start_pipeline(pipeline_name)
# All connectors should now be running (EOI)
assert not connector_paused(pipeline_name, "t1", "c1")
assert not connector_paused(pipeline_name, "t1", "c2")
assert not connector_paused(pipeline_name, "t1", "c3")
# Ensure no new records arrive for a continuous 5s window.
final_count = _adhoc_count(pipeline_name)
stable_since = [None]
def no_new_records_for_5s():
now = time.monotonic()
current = _adhoc_count(pipeline_name)
if current == final_count:
if stable_since[0] is None:
stable_since[0] = now
return now - stable_since[0] >= 5.0
stable_since[0] = None
return False
wait_for_condition(
"no new records for 5s after all connectors reached EOI",
no_new_records_for_5s,
timeout_s=30.0,
poll_interval_s=1.0,
)
assert _adhoc_count(pipeline_name) == final_count, (
"Received new records after all connectors reached EOI"
)