-
Notifications
You must be signed in to change notification settings - Fork 244
Expand file tree
/
Copy pathaudio.py
More file actions
119 lines (83 loc) · 3.24 KB
/
Copy pathaudio.py
File metadata and controls
119 lines (83 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from typing import Any, Optional, Type, TypeVar, Union
import numpy as np
from docarray.base_doc import BaseDoc
from docarray.typing import AnyEmbedding, AudioUrl
from docarray.typing.bytes.audio_bytes import AudioBytes
from docarray.typing.tensor.abstract_tensor import AbstractTensor
from docarray.typing.tensor.audio.audio_tensor import AudioTensor
from docarray.utils._internal.misc import is_tf_available, is_torch_available
torch_available = is_torch_available()
if torch_available:
import torch
tf_available = is_tf_available()
if tf_available:
import tensorflow as tf # type: ignore
T = TypeVar('T', bound='AudioDoc')
class AudioDoc(BaseDoc):
"""
Document for handling audios.
The Audio Document can contain an AudioUrl (`AudioDoc.url`), an AudioTensor
(`AudioDoc.tensor`), and an AnyEmbedding (`AudioDoc.embedding`).
EXAMPLE USAGE:
You can use this Document directly:
.. code-block:: python
from docarray.documents import AudioDoc
# use it directly
audio = Audio(
url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true'
)
audio.tensor, audio.frame_rate = audio.url.load()
model = MyEmbeddingModel()
audio.embedding = model(audio.tensor)
You can extend this Document:
.. code-block:: python
from docarray.documents import AudioDoc, TextDoc
from typing import Optional
# extend it
class MyAudio(Audio):
name: Optional[Text]
audio = MyAudio(
url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true'
)
audio.tensor, audio.frame_rate = audio.url.load()
model = MyEmbeddingModel()
audio.embedding = model(audio.tensor)
audio.name = Text(text='my first audio')
You can use this Document for composition:
.. code-block:: python
from docarray import BaseDoc
from docarray.documents import AudioDoc, TextDoc
# compose it
class MultiModalDoc(Document):
audio: Audio
text: Text
mmdoc = MultiModalDoc(
audio=Audio(
url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true'
),
text=Text(text='hello world, how are you doing?'),
)
mmdoc.audio.tensor, mmdoc.audio.frame_rate = mmdoc.audio.url.load()
# equivalent to
mmdoc.audio.bytes_ = mmdoc.audio.url.load_bytes()
mmdoc.audio.tensor, mmdoc.audio.frame_rate = mmdoc.audio.bytes.load()
"""
url: Optional[AudioUrl]
tensor: Optional[AudioTensor]
embedding: Optional[AnyEmbedding]
bytes_: Optional[AudioBytes]
frame_rate: Optional[int]
@classmethod
def validate(
cls: Type[T],
value: Union[str, AbstractTensor, Any],
) -> T:
if isinstance(value, str):
value = cls(url=value)
elif isinstance(value, (AbstractTensor, np.ndarray)) or (
torch_available
and isinstance(value, torch.Tensor)
or (tf_available and isinstance(value, tf.Tensor))
):
value = cls(tensor=value)
return super().validate(value)