-
Notifications
You must be signed in to change notification settings - Fork 237
Expand file tree
/
Copy pathdocarray.proto
More file actions
130 lines (96 loc) · 3.68 KB
/
docarray.proto
File metadata and controls
130 lines (96 loc) · 3.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
syntax = "proto3";
import "google/protobuf/struct.proto";
package docarray;
/**
* Represents a (quantized) dense n-dim array
*/
message DenseNdArrayProto {
// the actual array data, in bytes
bytes buffer = 1;
// the shape (dimensions) of the array
repeated uint32 shape = 2;
// the data type of the array
string dtype = 3;
}
/**
* Represents a general n-dim array, can be either dense or sparse
*/
message NdArrayProto {
oneof content {
DenseNdArrayProto dense = 1; // dense representation of the ndarray
SparseNdArrayProto sparse = 2; // sparse representation of the ndarray
}
// the name of the ndarray class
string cls_name = 3;
google.protobuf.Struct parameters = 4;
}
/**
* Represents a sparse ndarray
*/
message SparseNdArrayProto {
// A 2-D int64 tensor of shape [N, ndims], which specifies the indices of the elements in the sparse tensor that contain nonzero values (elements are zero-indexed)
DenseNdArrayProto indices = 1;
// A 1-D tensor of any type and shape [N], which supplies the values for each element in indices.
DenseNdArrayProto values = 2;
// A 1-D int64 tensor of shape [ndims], which specifies the shape of the sparse tensor.
repeated uint32 shape = 3;
}
/**
* Represents the relevance model to `ref_id`
*/
message NamedScoreProto {
float value = 1; // value
string op_name = 2; // the name of the operator/score function
string description = 3; // text description of the score
string ref_id = 4; // the score is computed between doc `id` and `ref_id`
}
/**
* Represents a Document
*/
message DocumentProto {
// A hexdigest that represents a unique document ID
string id = 1;
oneof content {
// the raw binary content of this document, which often represents the original document when comes into jina
bytes blob = 2;
// the ndarray of the image/audio/video document
NdArrayProto tensor = 3;
// a text document
string text = 4;
}
// the depth of the recursive chunk structure
uint32 granularity = 5;
// the width of the recursive match structure
uint32 adjacency = 6;
// the parent id from the previous granularity
string parent_id = 7;
// The weight of this document
float weight = 8;
// a uri of the document could be: a local file path, a remote url starts with http or https or data URI scheme
string uri = 9;
// modality, an identifier to the modality this document belongs to. In the scope of multi/cross modal search
string modality = 10;
// mime type of this document, for buffer content, this is required; for other contents, this can be guessed
string mime_type = 11;
// the offset of the doc
float offset = 12;
// the position of the doc, could be start and end index of a string; could be x,y (top, left) coordinate of an image crop; could be timestamp of an audio clip
repeated float location = 13;
// list of the sub-documents of this document (recursive structure)
repeated DocumentProto chunks = 14;
// the matched documents on the same level (recursive structure)
repeated DocumentProto matches = 15;
// the embedding of this document
NdArrayProto embedding = 16;
// a structured data value, consisting of field which map to dynamically typed values.
google.protobuf.Struct tags = 17;
// Scores performed on the document, each element corresponds to a metric
map<string, NamedScoreProto> scores = 18;
// Evaluations performed on the document, each element corresponds to a metric
map<string, NamedScoreProto> evaluations = 19;
// system-defined meta attributes represented in a structured data value.
google.protobuf.Struct _metadata = 20;
}
message DocumentArrayProto {
repeated DocumentProto docs = 1; // a list of Documents
}