-
Notifications
You must be signed in to change notification settings - Fork 237
Expand file tree
/
Copy pathdocarray.proto
More file actions
112 lines (86 loc) · 2.14 KB
/
docarray.proto
File metadata and controls
112 lines (86 loc) · 2.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
syntax = "proto3";
import "google/protobuf/struct.proto";
package docarray;
/**
* Represents a (quantized) dense n-dim array
*/
message DenseNdArrayProto {
// the actual array data, in bytes
bytes buffer = 1;
// the shape (dimensions) of the array
repeated uint32 shape = 2;
// the data type of the array
string dtype = 3;
}
/**
* Represents a general n-dim array, can be either dense or sparse
*/
message NdArrayProto {
DenseNdArrayProto dense = 1; // dense representation of the ndarray
google.protobuf.Struct parameters = 2;
}
message KeyValuePair {
google.protobuf.Value key = 1;
google.protobuf.Value value = 2;
}
message GenericDictValue {
repeated KeyValuePair entries = 1;
}
message NodeProto {
oneof content {
// a text
string text = 1;
//an integer
int32 integer = 2;
// a float
double float = 3;
//a boolean
bool boolean = 4;
// a bytes representation
bytes blob = 5;
// the ndarray of the image/audio/video document
NdArrayProto ndarray = 6;
// a sub Document
DocProto doc = 7;
// a sub DocArray
DocListProto doc_array = 8;
//any list
ListOfAnyProto list = 9;
//any set
ListOfAnyProto set = 10;
//any tuple
ListOfAnyProto tuple = 11;
// dictionary with string as keys
DictOfAnyProto dict = 12;
}
oneof docarray_type {
string type = 13;
}
}
/**
* Represents a Document
*/
message DocProto {
map<string, NodeProto> data = 1;
}
message DictOfAnyProto {
map<string, NodeProto> data = 1;
}
message ListOfAnyProto {
repeated NodeProto data = 1;
}
message DocListProto {
repeated DocProto docs = 1; // a list of Documents
}
message ListOfDocArrayProto {
repeated DocListProto data = 1;
}
message ListOfDocVecProto {
repeated DocVecProto data = 1;
}
message DocVecProto{
map<string, NdArrayProto> tensor_columns = 1; // a dict of document columns
map<string, DocVecProto> doc_columns = 2; // a dict of tensor columns
map<string, ListOfDocVecProto> docs_vec_columns = 3; // a dict of document array columns
map<string, ListOfAnyProto> any_columns = 4; // a dict of any columns. Used for the rest of the data
}