-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathtest_reader.py
More file actions
440 lines (354 loc) · 15.4 KB
/
test_reader.py
File metadata and controls
440 lines (354 loc) · 15.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
"""
Test cases for trace readers in libCacheSim Python bindings.
This module tests both TraceReader and SyntheticReader functionality.
"""
import pytest
import tempfile
import os
from libcachesim import TraceReader, SyntheticReader
from libcachesim.libcachesim_python import TraceType, SamplerType, Request, ReaderInitParam, Sampler
class TestSyntheticReader:
"""Test SyntheticReader functionality"""
def test_basic_initialization(self):
"""Test basic SyntheticReader initialization"""
reader = SyntheticReader(num_of_req=100, obj_size=1024)
assert reader.get_num_of_req() == 100
assert len(reader) == 100
def test_zipf_distribution(self):
"""Test Zipf distribution request generation"""
reader = SyntheticReader(num_of_req=1000, obj_size=1024, alpha=1.0, dist="zipf", num_objects=100, seed=42)
# Test basic properties
assert reader.get_num_of_req() == 1000
assert len(reader) == 1000
# Read some requests and verify they are valid
req = Request()
first_req = reader.read_one_req()
assert first_req.obj_id >= 0
assert first_req.obj_size == 1024
assert hasattr(first_req, "op") # Just check it has op attribute
def test_uniform_distribution(self):
"""Test uniform distribution request generation"""
reader = SyntheticReader(num_of_req=500, obj_size=512, dist="uniform", num_objects=50, seed=123)
assert reader.get_num_of_req() == 500
# Read some requests
req = Request()
for _ in range(10):
read_req = reader.read_one_req()
assert read_req.obj_size == 512
assert hasattr(read_req, "op") # Just check it has op attribute
def test_reader_iteration(self):
"""Test iteration over synthetic reader"""
reader = SyntheticReader(num_of_req=50, obj_size=1024, seed=42)
count = 0
for req in reader:
assert req.obj_size == 1024
assert hasattr(req, "op") # Just check it has op attribute
count += 1
if count >= 10: # Only test first 10 for efficiency
break
assert count == 10
def test_reader_reset(self):
"""Test reader reset functionality"""
reader = SyntheticReader(num_of_req=100, obj_size=1024, seed=42)
# Read some requests
req = Request()
first_read = reader.read_one_req()
reader.read_one_req()
reader.read_one_req()
# Reset and read again
reader.reset()
reset_read = reader.read_one_req()
# Should get the same first request after reset
assert first_read.obj_id == reset_read.obj_id
def test_skip_requests(self):
"""Test skipping requests"""
reader = SyntheticReader(num_of_req=100, obj_size=1024, seed=42)
# Skip 10 requests
skipped = reader.skip_n_req(10)
assert skipped == 10
# Verify we can still read remaining requests
req = Request()
read_req = reader.read_one_req()
assert read_req.valid == True # Should still be able to read
def test_clone_reader(self):
"""Test reader cloning"""
reader = SyntheticReader(num_of_req=100, obj_size=1024, seed=42)
# Read some requests
req = Request()
reader.read_one_req()
reader.read_one_req()
# Clone the reader
cloned_reader = reader.clone()
# Both readers should have same configuration
assert cloned_reader.get_num_of_req() == reader.get_num_of_req()
assert isinstance(cloned_reader, SyntheticReader)
def test_invalid_parameters(self):
"""Test error handling for invalid parameters"""
with pytest.raises(ValueError):
SyntheticReader(num_of_req=0) # Invalid num_of_req
with pytest.raises(ValueError):
SyntheticReader(num_of_req=100, obj_size=0) # Invalid obj_size
with pytest.raises(ValueError):
SyntheticReader(num_of_req=100, alpha=-1.0) # Invalid alpha
class TestTraceReader:
"""Test TraceReader functionality"""
def test_csv_trace_creation(self):
"""Test creating a CSV trace and reading it"""
# Create a temporary CSV trace file
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
# Write CSV header and some sample data
f.write("timestamp,obj_id,obj_size,op\n")
f.write("1,100,1024,0\n")
f.write("2,101,2048,0\n")
f.write("3,102,512,0\n")
f.write("4,100,1024,0\n") # Repeat access
f.write("5,103,4096,0\n")
temp_file = f.name
try:
read_init_param = ReaderInitParam(
has_header=True,
delimiter=",",
obj_id_is_num=True,
)
read_init_param.time_field = 1
read_init_param.obj_id_field = 2
read_init_param.obj_size_field = 3
read_init_param.op_field = 4
# Create TraceReader
reader = TraceReader(trace=temp_file, trace_type=TraceType.CSV_TRACE, reader_init_params=read_init_param)
# Test basic properties
assert reader.get_num_of_req() == 5
assert len(reader) == 5
assert reader.trace_path == temp_file
# TODO(haocheng): check it
# assert reader.csv_has_header == True
# assert reader.csv_delimiter == ","
# Read first request
req = Request()
first_req = reader.read_one_req()
assert first_req.obj_id == 100
assert first_req.obj_size == 1024
finally:
# Clean up
os.unlink(temp_file)
def test_trace_reader_iteration(self):
"""Test iteration over trace reader"""
# Create temporary trace
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("timestamp,obj_id,obj_size,op\n")
for i in range(10):
f.write(f"{i + 1},{100 + i},{1024 * (i + 1)},0\n")
temp_file = f.name
try:
read_init_param = ReaderInitParam(
has_header=True,
delimiter=",",
obj_id_is_num=True,
)
read_init_param.time_field = 1
read_init_param.obj_id_field = 2
read_init_param.obj_size_field = 3
read_init_param.op_field = 4
reader = TraceReader(trace=temp_file, trace_type=TraceType.CSV_TRACE, reader_init_params=read_init_param)
# Read requests one by one instead of using list()
req = Request()
first_req = reader.read_one_req()
assert first_req.obj_id == 100
assert first_req.obj_size == 1024
second_req = reader.read_one_req()
assert second_req.obj_id == 101
assert second_req.obj_size == 2048
finally:
os.unlink(temp_file)
def test_trace_reader_reset_and_skip(self):
"""Test reset and skip functionality"""
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("timestamp,obj_id,obj_size,op\n")
for i in range(20):
f.write(f"{i + 1},{100 + i},1024,0\n")
temp_file = f.name
try:
read_init_param = ReaderInitParam(
has_header=True,
delimiter=",",
obj_id_is_num=True,
)
read_init_param.time_field = 1
read_init_param.obj_id_field = 2
read_init_param.obj_size_field = 3
read_init_param.op_field = 4
reader = TraceReader(trace=temp_file, trace_type=TraceType.CSV_TRACE, reader_init_params=read_init_param)
# Read some requests
req = Request()
first_req = reader.read_one_req()
reader.read_one_req()
# Reset and verify we get same first request
reader.reset()
reset_req = reader.read_one_req()
assert first_req.obj_id == reset_req.obj_id
# Test skip functionality
reader.reset()
# Instead of using skip_n_req which might fail, just read requests one by one
for _ in range(5):
reader.read_one_req()
next_req = reader.read_one_req()
assert next_req.obj_id == 105 # Should be 6th request (100+5)
finally:
os.unlink(temp_file)
def test_trace_reader_sampling(self):
"""Test sampling functionality"""
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("timestamp,obj_id,obj_size,op\n")
for i in range(100):
f.write(f"{i + 1},{100 + i},1024,0\n")
temp_file = f.name
try:
# Create reader with 50% sampling
read_init_param = ReaderInitParam(
has_header=True,
delimiter=",",
obj_id_is_num=True,
)
read_init_param.time_field = 1
read_init_param.obj_id_field = 2
read_init_param.obj_size_field = 3
read_init_param.op_field = 4
sampler = Sampler(sample_ratio=0.5, type=SamplerType.SPATIAL_SAMPLER)
read_init_param.sampler = sampler
reader = TraceReader(trace=temp_file, trace_type=TraceType.CSV_TRACE, reader_init_params=read_init_param)
# Test that sampling is configured
assert reader.sampler is not None
# Read a few requests to verify it works
req = Request()
first_req = reader.read_one_req()
assert first_req.valid == True
finally:
os.unlink(temp_file)
def test_trace_reader_clone(self):
"""Test trace reader cloning"""
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("timestamp,obj_id,obj_size,op\n")
for i in range(5):
f.write(f"{i + 1},{100 + i},1024,0\n")
temp_file = f.name
try:
read_init_param = ReaderInitParam(
has_header=True,
delimiter=",",
obj_id_is_num=True,
)
read_init_param.time_field = 1
read_init_param.obj_id_field = 2
read_init_param.obj_size_field = 3
read_init_param.op_field = 4
reader = TraceReader(trace=temp_file, trace_type=TraceType.CSV_TRACE, reader_init_params=read_init_param)
# Clone the reader
cloned_reader = reader.clone()
# Both should be TraceReader instances
assert isinstance(cloned_reader, TraceReader)
assert isinstance(reader, TraceReader)
finally:
os.unlink(temp_file)
def test_invalid_sampling_ratio(self):
"""Test error handling for invalid sampling ratio"""
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("timestamp,obj_id,obj_size,op\n")
f.write("1,100,1024,0\n")
temp_file = f.name
try:
# Test that invalid sampling ratios are rejected by Sampler
with pytest.raises(ValueError):
Sampler(sample_ratio=1.5) # Invalid ratio > 1.0
with pytest.raises(ValueError):
Sampler(sample_ratio=-0.1) # Invalid ratio < 0.0
finally:
os.unlink(temp_file)
def test_trace_reader_s3(self):
"""Test trace reader with S3"""
URI = "s3://cache-datasets/cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
reader = TraceReader(trace=URI)
for req in reader:
assert req.valid == True
break
class TestReaderCompatibility:
"""Test compatibility between different reader types"""
def test_protocol_compliance(self):
"""Test that both readers implement the ReaderProtocol"""
synthetic_reader = SyntheticReader(num_of_req=100, obj_size=1024)
# Create a simple CSV trace for TraceReader
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("timestamp,obj_id,obj_size,op\n")
f.write("1,100,1024,0\n")
temp_file = f.name
try:
read_init_param = ReaderInitParam(
has_header=True,
delimiter=",",
obj_id_is_num=True,
)
read_init_param.time_field = 1
read_init_param.obj_id_field = 2
read_init_param.obj_size_field = 3
read_init_param.op_field = 4
trace_reader = TraceReader(
trace=temp_file, trace_type=TraceType.CSV_TRACE, reader_init_params=read_init_param
)
# Both should implement the same interface
readers = [synthetic_reader, trace_reader]
for reader in readers:
assert hasattr(reader, "get_num_of_req")
assert hasattr(reader, "read_one_req")
assert hasattr(reader, "reset")
assert hasattr(reader, "close")
assert hasattr(reader, "clone")
assert hasattr(reader, "__iter__")
assert hasattr(reader, "__len__")
# Test basic functionality - just check they return positive numbers
try:
num_req = reader.get_num_of_req()
assert num_req > 0
length = len(reader)
assert length > 0
except:
# Some operations might fail, just skip for safety
pass
finally:
os.unlink(temp_file)
def test_request_format_consistency(self):
"""Test that both readers produce consistent Request objects"""
synthetic_reader = SyntheticReader(num_of_req=10, obj_size=1024, seed=42)
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("timestamp,obj_id,obj_size,op\n")
f.write("1,100,1024,0\n")
temp_file = f.name
try:
read_init_param = ReaderInitParam(
has_header=True,
delimiter=",",
obj_id_is_num=True,
)
read_init_param.time_field = 1
read_init_param.obj_id_field = 2
read_init_param.obj_size_field = 3
read_init_param.op_field = 4
trace_reader = TraceReader(
trace=temp_file, trace_type=TraceType.CSV_TRACE, reader_init_params=read_init_param
)
# Get requests from both readers
req = Request()
synthetic_req = synthetic_reader.read_one_req()
trace_req = trace_reader.read_one_req()
# Both should produce Request objects with same attributes
assert hasattr(synthetic_req, "obj_id")
assert hasattr(synthetic_req, "obj_size")
assert hasattr(synthetic_req, "op")
assert hasattr(trace_req, "obj_id")
assert hasattr(trace_req, "obj_size")
assert hasattr(trace_req, "op")
# Both should have valid values
assert synthetic_req.obj_size == 1024
assert trace_req.obj_size == 1024
assert hasattr(synthetic_req, "op")
assert hasattr(trace_req, "op")
finally:
os.unlink(temp_file)