forked from docarray/docarray
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfilter.py
More file actions
83 lines (68 loc) · 2.49 KB
/
filter.py
File metadata and controls
83 lines (68 loc) · 2.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
__all__ = ['filter_docs']
import json
from typing import Dict, List, Union
from docarray.array.any_array import AnyDocArray
from docarray.array.doc_list.doc_list import DocList
def filter_docs(
docs: AnyDocArray,
query: Union[str, Dict, List[Dict]],
) -> AnyDocArray:
"""
Filter the Documents in the index according to the given filter query.
Filter queries use the same syntax as the MongoDB query language (https://www.mongodb.com/docs/manual/tutorial/query-documents/#specify-conditions-using-query-operators).
You can see a list of the supported operators here (https://www.mongodb.com/docs/manual/reference/operator/query/#std-label-query-selectors)
---
```python
from docarray import DocList, BaseDoc
from docarray.documents import TextDoc, ImageDoc
from docarray.utils.filter import filter_docs
class MyDocument(BaseDoc):
caption: TextDoc
ImageDoc: ImageDoc
price: int
docs = DocList[MyDocument](
[
MyDocument(
caption='A tiger in the jungle',
ImageDoc=ImageDoc(url='tigerphoto.png'),
price=100,
),
MyDocument(
caption='A swimming turtle',
ImageDoc=ImageDoc(url='turtlepic.png'),
price=50,
),
MyDocument(
caption='A couple birdwatching with binoculars',
ImageDoc=ImageDoc(url='binocularsphoto.png'),
price=30,
),
]
)
query = {
'$and': {
'ImageDoc__url': {'$regex': 'photo'},
'price': {'$lte': 50},
}
}
results = filter_docs(docs, query)
assert len(results) == 1
assert results[0].price == 30
assert results[0].caption == 'A couple birdwatching with binoculars'
assert results[0].ImageDoc.url == 'binocularsphoto.png'
```
---
:param docs: the DocList where to apply the filter
:param query: the query to filter by
:return: A DocList containing the Documents
in `docs` that fulfill the filter conditions in the `query`
"""
from docarray.utils._internal.query_language.query_parser import QueryParser
if query:
query = query if not isinstance(query, str) else json.loads(query)
parser = QueryParser(query)
return DocList.__class_getitem__(docs.doc_type)(
d for d in docs if parser.evaluate(d)
)
else:
return docs