-
Notifications
You must be signed in to change notification settings - Fork 237
Expand file tree
/
Copy pathtest_array_from_to_csv.py
More file actions
166 lines (120 loc) · 4.46 KB
/
test_array_from_to_csv.py
File metadata and controls
166 lines (120 loc) · 4.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import os
from typing import Optional
import pytest
from docarray import BaseDoc, DocList, DocVec
from docarray.documents import ImageDoc
from tests import TOYDATA_DIR
@pytest.fixture()
def nested_doc_cls():
class MyDoc(BaseDoc):
count: Optional[int] = None
text: str
class MyDocNested(MyDoc):
image: ImageDoc
image2: ImageDoc
return MyDocNested
def test_to_from_csv(tmpdir, nested_doc_cls):
da = DocList[nested_doc_cls](
[
nested_doc_cls(
count=0,
text='hello',
image=ImageDoc(url='aux.png'),
image2=ImageDoc(url='aux.png'),
),
nested_doc_cls(text='hello world', image=ImageDoc(), image2=ImageDoc()),
]
)
tmp_file = str(tmpdir / 'tmp.csv')
da.to_csv(tmp_file)
assert os.path.isfile(tmp_file)
da_from = DocList[nested_doc_cls].from_csv(tmp_file)
assert isinstance(da_from, DocList)
for doc1, doc2 in zip(da, da_from):
assert doc1 == doc2
def test_from_csv_nested(nested_doc_cls):
da = DocList[nested_doc_cls].from_csv(
file_path=str(TOYDATA_DIR / 'docs_nested.csv')
)
assert isinstance(da, DocList)
assert len(da) == 3
for i, doc in enumerate(da):
assert doc.count.__class__ == int
assert doc.count == int(f'{i}{i}{i}')
assert doc.text.__class__ == str
assert doc.text == f'hello {i}'
assert doc.image.__class__ == ImageDoc
assert doc.image.tensor is None
assert doc.image.embedding is None
assert doc.image.bytes_ is None
assert doc.image2.__class__ == ImageDoc
assert doc.image2.tensor is None
assert doc.image2.embedding is None
assert doc.image2.bytes_ is None
assert da[0].image2.url == 'image_10.png'
assert da[1].image2.url is None
assert da[2].image2.url is None
@pytest.fixture()
def nested_doc():
class Inner(BaseDoc):
img: Optional[ImageDoc] = None
class Middle(BaseDoc):
img: Optional[ImageDoc] = None
inner: Optional[Inner] = None
class Outer(BaseDoc):
img: Optional[ImageDoc] = None
middle: Optional[Middle] = None
doc = Outer(
img=ImageDoc(), middle=Middle(img=ImageDoc(), inner=Inner(img=ImageDoc()))
)
return doc
def test_from_csv_without_schema_raise_exception():
with pytest.raises(TypeError, match='no document schema defined'):
DocList.from_csv(file_path=str(TOYDATA_DIR / 'docs_nested.csv'))
def test_from_csv_with_wrong_schema_raise_exception(nested_doc):
with pytest.raises(ValueError, match='Column names do not match the schema'):
DocList[nested_doc.__class__].from_csv(file_path=str(TOYDATA_DIR / 'docs.csv'))
def test_from_remote_csv_file():
remote_url = 'https://github.com/docarray/docarray/blob/main/tests/toydata/books.csv?raw=true'
class Book(BaseDoc):
title: str
author: str
year: int
books = DocList[Book].from_csv(file_path=remote_url)
assert isinstance(books, DocList)
assert len(books) == 3
def test_doc_list_error(tmpdir):
class Book(BaseDoc):
title: str
# not testing DocVec bc it already fails here (as it should!)
docs = DocList([Book(title='hello'), Book(title='world')])
tmp_file = str(tmpdir / 'tmp.csv')
with pytest.raises(TypeError):
docs.to_csv(tmp_file)
def test_union_type_error(tmp_path):
from typing import Union
from docarray.documents import TextDoc
class CustomDoc(BaseDoc):
ud: Union[TextDoc, ImageDoc] = TextDoc(text='union type')
docs = DocList[CustomDoc]([CustomDoc(ud=TextDoc(text='union type'))])
with pytest.raises(ValueError):
docs.to_csv(str(tmp_path) + ".csv")
DocList[CustomDoc].from_csv(str(tmp_path) + ".csv")
class BasisUnion(BaseDoc):
ud: Union[int, str]
docs_basic = DocList[BasisUnion]([BasisUnion(ud="hello")])
docs_basic.to_csv(str(tmp_path) + ".csv")
docs_copy = DocList[BasisUnion].from_csv(str(tmp_path) + ".csv")
assert docs_copy == docs_basic
def test_to_from_csv_docvec_raises():
class Book(BaseDoc):
title: str
author: str
year: int
books = DocVec[Book](
[Book(title='It\'s me, hi', author='I\'m the problem it\'s me', year=2022)]
)
with pytest.raises(NotImplementedError):
books.to_csv('dummy/file/path')
with pytest.raises(NotImplementedError):
DocVec[Book].from_csv('dummy/file/path')