-
Notifications
You must be signed in to change notification settings - Fork 244
Expand file tree
/
Copy pathtest_map.py
More file actions
85 lines (57 loc) · 2.28 KB
/
Copy pathtest_map.py
File metadata and controls
85 lines (57 loc) · 2.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from typing import Generator, Optional
import pytest
from docarray import BaseDoc, DocArray
from docarray.documents import ImageDoc
from docarray.typing import ImageUrl, NdArray
from docarray.utils.map import map_docs, map_docs_batch
from tests.units.typing.test_bytes import IMAGE_PATHS
N_DOCS = 2
def load_from_doc(d: ImageDoc) -> ImageDoc:
if d.url is not None:
d.tensor = d.url.load()
return d
@pytest.fixture()
def da():
da = DocArray[ImageDoc]([ImageDoc(url=IMAGE_PATHS['png']) for _ in range(N_DOCS)])
return da
@pytest.mark.parametrize('backend', ['thread', 'process'])
def test_map(da, backend):
for tensor in da.tensor:
assert tensor is None
docs = list(map_docs(da=da, func=load_from_doc, backend=backend))
assert len(docs) == N_DOCS
for doc in docs:
assert doc.tensor is not None
def test_map_multiprocessing_lambda_func_raise_exception(da):
with pytest.raises(ValueError, match='Multiprocessing does not allow'):
list(map_docs(da=da, func=lambda x: x, backend='process'))
def test_map_multiprocessing_local_func_raise_exception(da):
def local_func(x):
return x
with pytest.raises(ValueError, match='Multiprocessing does not allow'):
list(map_docs(da=da, func=local_func, backend='process'))
@pytest.mark.parametrize('backend', ['thread', 'process'])
def test_check_order(backend):
da = DocArray[ImageDoc]([ImageDoc(id=i) for i in range(N_DOCS)])
docs = list(map_docs(da=da, func=load_from_doc, backend=backend))
assert len(docs) == N_DOCS
for i, doc in enumerate(docs):
assert doc.id == str(i)
def load_from_da(da: DocArray) -> DocArray:
for doc in da:
doc.tensor = doc.url.load()
return da
class MyImage(BaseDoc):
tensor: Optional[NdArray]
url: ImageUrl
@pytest.mark.slow
@pytest.mark.parametrize('n_docs,batch_size', [(10, 5), (10, 8)])
@pytest.mark.parametrize('backend', ['thread', 'process'])
def test_map_docs_batch(n_docs, batch_size, backend):
da = DocArray[MyImage]([MyImage(url=IMAGE_PATHS['png']) for _ in range(n_docs)])
it = map_docs_batch(
da=da, func=load_from_da, batch_size=batch_size, backend=backend
)
assert isinstance(it, Generator)
for batch in it:
assert isinstance(batch, DocArray[MyImage])