forked from docarray/docarray
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocarray.proto
More file actions
127 lines (94 loc) · 3.56 KB
/
docarray.proto
File metadata and controls
127 lines (94 loc) · 3.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
syntax = "proto3";
import "google/protobuf/struct.proto";
package docarray;
/**
* Represents a (quantized) dense n-dim array
*/
message DenseNdArrayProto {
// the actual array data, in bytes
bytes buffer = 1;
// the shape (dimensions) of the array
repeated uint32 shape = 2;
// the data type of the array
string dtype = 3;
}
/**
* Represents a general n-dim array, can be either dense or sparse
*/
message NdArrayProto {
oneof content {
DenseNdArrayProto dense = 1; // dense representation of the ndarray
SparseNdArrayProto sparse = 2; // sparse representation of the ndarray
}
// the name of the ndarray class
string cls_name = 3;
google.protobuf.Struct parameters = 4;
}
/**
* Represents a sparse ndarray
*/
message SparseNdArrayProto {
// A 2-D int64 tensor of shape [N, ndims], which specifies the indices of the elements in the sparse tensor that contain nonzero values (elements are zero-indexed)
DenseNdArrayProto indices = 1;
// A 1-D tensor of any type and shape [N], which supplies the values for each element in indices.
DenseNdArrayProto values = 2;
// A 1-D int64 tensor of shape [ndims], which specifies the shape of the sparse tensor.
repeated uint32 shape = 3;
}
/**
* Represents the relevance model to `ref_id`
*/
message NamedScoreProto {
float value = 1; // value
string op_name = 2; // the name of the operator/score function
string description = 3; // text description of the score
string ref_id = 4; // the score is computed between doc `id` and `ref_id`
}
/**
* Represents a Document
*/
message DocumentProto {
// A hexdigest that represents a unique document ID
string id = 1;
oneof content {
// the raw binary content of this document, which often represents the original document when comes into jina
bytes buffer = 2;
// the ndarray of the image/audio/video document
NdArrayProto blob = 3;
// a text document
string text = 4;
}
// the depth of the recursive chunk structure
uint32 granularity = 5;
// the width of the recursive match structure
uint32 adjacency = 6;
// the parent id from the previous granularity
string parent_id = 7;
// The weight of this document
float weight = 8;
// a uri of the document could be: a local file path, a remote url starts with http or https or data URI scheme
string uri = 9;
// modality, an identifier to the modality this document belongs to. In the scope of multi/cross modal search
string modality = 10;
// mime type of this document, for buffer content, this is required; for other contents, this can be guessed
string mime_type = 11;
// the offset of the doc
float offset = 12;
// the position of the doc, could be start and end index of a string; could be x,y (top, left) coordinate of an image crop; could be timestamp of an audio clip
repeated float location = 13;
// list of the sub-documents of this document (recursive structure)
repeated DocumentProto chunks = 14;
// the matched documents on the same level (recursive structure)
repeated DocumentProto matches = 15;
// the embedding of this document
NdArrayProto embedding = 16;
// a structured data value, consisting of field which map to dynamically typed values.
google.protobuf.Struct tags = 17;
// Scores performed on the document, each element corresponds to a metric
map<string, NamedScoreProto> scores = 18;
// Evaluations performed on the document, each element corresponds to a metric
map<string, NamedScoreProto> evaluations = 19;
}
message DocumentArrayProto {
repeated DocumentProto docs = 1; // a list of Documents
}