-
Notifications
You must be signed in to change notification settings - Fork 237
Expand file tree
/
Copy pathtest_storage.py
More file actions
104 lines (75 loc) · 2.98 KB
/
test_storage.py
File metadata and controls
104 lines (75 loc) · 2.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
from docarray import BaseDoc
from docarray.array import DocVec
from docarray.array.doc_vec.column_storage import ColumnStorageView
from docarray.typing import AnyTensor
def test_column_storage_init():
class InnerDoc(BaseDoc):
price: int
class MyDoc(BaseDoc):
tensor: AnyTensor
name: str
doc: InnerDoc
docs = [
MyDoc(tensor=np.zeros(10), name='hello', doc=InnerDoc(price=i))
for i in range(4)
]
storage = DocVec[MyDoc](docs)._storage
assert (storage.tensor_columns['tensor'] == np.zeros((4, 10))).all()
for name in storage.any_columns['name']:
assert name == 'hello'
inner_docs = storage.doc_columns['doc']
assert isinstance(inner_docs, DocVec)
for i, doc in enumerate(inner_docs):
assert isinstance(doc, InnerDoc)
assert doc.price == i
def test_column_storage_view():
class MyDoc(BaseDoc):
tensor: AnyTensor
name: str
docs = [MyDoc(tensor=np.zeros((10, 10)), name='hello', id=str(i)) for i in range(4)]
storage = DocVec[MyDoc](docs)._storage
view = ColumnStorageView(0, storage)
assert view['id'] == '0'
assert (view['tensor'] == np.zeros(10)).all()
assert view['name'] == 'hello'
view['id'] = '1'
view['tensor'] = np.ones(10)
view['name'] = 'byebye'
assert storage.any_columns['id'][0] == '1'
assert (storage.tensor_columns['tensor'][0] == np.ones(10)).all()
assert storage.any_columns['name'][0] == 'byebye'
def test_column_storage_to_dict():
class MyDoc(BaseDoc):
tensor: AnyTensor
name: str
docs = [MyDoc(tensor=np.zeros((10, 10)), name='hello', id=str(i)) for i in range(4)]
storage = DocVec[MyDoc](docs)._storage
view = ColumnStorageView(0, storage)
dict_view = view.to_dict()
assert dict_view['id'] == '0'
assert (dict_view['tensor'] == np.zeros(10)).all()
assert np.may_share_memory(dict_view['tensor'], view['tensor'])
assert dict_view['name'] == 'hello'
def test_storage_view_dict_like():
class MyDoc(BaseDoc):
tensor: AnyTensor
name: str
docs = [MyDoc(tensor=np.zeros((10, 10)), name='hello', id=str(i)) for i in range(4)]
storage = DocVec[MyDoc](docs)._storage
view = ColumnStorageView(0, storage)
assert list(view.keys()) == ['id', 'name', 'tensor']
# since boolean value of np array is ambiguous, we iterate manually
for val_view, val_reference in zip(view.values(), ['0', 'hello', np.zeros(10)]):
if isinstance(val_view, np.ndarray):
assert (val_view == val_reference).all()
else:
assert val_view == val_reference
for item_view, item_reference in zip(
view.items(), [('id', '0'), ('name', 'hello'), ('tensor', np.zeros(10))]
):
if isinstance(item_view[1], np.ndarray):
assert item_view[0] == item_reference[0]
assert (item_view[1] == item_reference[1]).all()
else:
assert item_view == item_reference