Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion docarray/base_doc/doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ class MyDoc(BaseDoc):
https://docs.pydantic.dev/usage/models/) and can be used in a similar way.
"""

id: Optional[ID] = Field(default_factory=lambda: ID(os.urandom(16).hex()))
id: Optional[ID] = Field(
description='The ID of the BaseDoc. This is useful for indexing in vector stores. If not set by user, it will automatically be assigned a random value',
default_factory=lambda: ID(os.urandom(16).hex()),
example=os.urandom(16).hex(),
)

class Config:
json_loads = orjson.loads
Expand Down
23 changes: 13 additions & 10 deletions docarray/index/backends/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@


class ElasticDocIndex(BaseDocIndex, Generic[TSchema]):
_index_vector_params: Optional[Tuple[str]] = ('dims', 'similarity', 'index')
_index_vector_options: Optional[Tuple[str]] = ('m', 'ef_construction')

def __init__(self, db_config=None, **kwargs):
"""Initialize ElasticDocIndex"""
super().__init__(db_config=db_config, **kwargs)
Expand All @@ -82,9 +85,6 @@ def __init__(self, db_config=None, **kwargs):
self._logger.debug('ElasticSearch client has been created')

# ElasticSearh index setup
self._index_vector_params = ('dims', 'similarity', 'index')
self._index_vector_options = ('m', 'ef_construction')

mappings: Dict[str, Any] = {
'dynamic': True,
'_source': {'enabled': 'true'},
Expand Down Expand Up @@ -572,20 +572,23 @@ def _filter_by_parent_id(self, id: str) -> List[str]:
# Helpers #
###############################################

def _create_index_mapping(self, col: '_ColumnInfo') -> Dict[str, Any]:
@classmethod
def _create_index_mapping(cls, col: '_ColumnInfo') -> Dict[str, Any]:
"""Create a new HNSW index for a column, and initialize it."""

index = {'type': col.config['type'] if 'type' in col.config else col.db_type}

if col.db_type == 'dense_vector':
for k in self._index_vector_params:
index[k] = col.config[k]
if cls._index_vector_params is not None:
for k in cls._index_vector_params:
index[k] = col.config[k]
if col.n_dim:
index['dims'] = col.n_dim
index['index_options'] = dict(
(k, col.config[k]) for k in self._index_vector_options
)
index['index_options']['type'] = 'hnsw'
if cls._index_vector_options is not None:
index['index_options'] = dict(
(k, col.config[k]) for k in cls._index_vector_options
)
index['index_options']['type'] = 'hnsw'
return index

def _send_requests(
Expand Down
20 changes: 5 additions & 15 deletions docarray/index/backends/elasticv7.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import warnings
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Sequence, TypeVar, Union
from typing import Any, Dict, List, Optional, Sequence, TypeVar, Union, Tuple

import numpy as np
from pydantic import parse_obj_as

from docarray import BaseDoc
from docarray.index import ElasticDocIndex
from docarray.index.abstract import BaseDocIndex, _ColumnInfo
from docarray.index.abstract import BaseDocIndex
from docarray.typing import AnyTensor
from docarray.typing.tensor.ndarray import NdArray
from docarray.utils.find import _FindResult
Expand All @@ -17,6 +17,9 @@


class ElasticV7DocIndex(ElasticDocIndex):
_index_vector_params: Optional[Tuple[str]] = ('dims',)
_index_vector_options: Optional[Tuple[str]] = None

def __init__(self, db_config=None, **kwargs):
"""Initialize ElasticV7DocIndex"""
from elasticsearch import __version__ as __es__version__
Expand Down Expand Up @@ -130,19 +133,6 @@ def execute_query(self, query: Dict[str, Any], *args, **kwargs) -> Any:
# Helpers #
###############################################

# ElasticSearch helpers
def _create_index_mapping(self, col: '_ColumnInfo') -> Dict[str, Any]:
"""Create a new HNSW index for a column, and initialize it."""

index = col.config.copy()
if 'type' not in index:
index['type'] = col.db_type

if col.db_type == 'dense_vector' and col.n_dim:
index['dims'] = col.n_dim

return index

def _form_search_body(self, query: np.ndarray, limit: int, search_field: str = '') -> Dict[str, Any]: # type: ignore
body = {
'size': limit,
Expand Down
2 changes: 1 addition & 1 deletion docarray/index/backends/hnswlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def __init__(self, db_config=None, **kwargs):
sub_docs_exist = True
if safe_issubclass(col.docarray_type, AnyDocArray):
continue
if not col.config:
if not col.config or 'dim' not in col.config:
# non-tensor type; don't create an index
continue
if not load_existing and (
Expand Down
16 changes: 8 additions & 8 deletions tests/index/base_classes/test_base_doc_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def test_create_columns():
assert index._column_infos['id'].docarray_type == ID
assert index._column_infos['id'].db_type == str
assert index._column_infos['id'].n_dim is None
assert index._column_infos['id'].config == {'hi': 'there'}
assert index._column_infos['id'].config['hi'] == 'there'

assert issubclass(index._column_infos['tens'].docarray_type, AbstractTensor)
assert index._column_infos['tens'].db_type == str
Expand All @@ -171,7 +171,7 @@ def test_create_columns():
assert index._column_infos['id'].docarray_type == ID
assert index._column_infos['id'].db_type == str
assert index._column_infos['id'].n_dim is None
assert index._column_infos['id'].config == {'hi': 'there'}
assert index._column_infos['id'].config['hi'] == 'there'

assert issubclass(index._column_infos['tens_one'].docarray_type, AbstractTensor)
assert index._column_infos['tens_one'].db_type == str
Expand All @@ -190,7 +190,7 @@ def test_create_columns():
assert index._column_infos['id'].docarray_type == ID
assert index._column_infos['id'].db_type == str
assert index._column_infos['id'].n_dim is None
assert index._column_infos['id'].config == {'hi': 'there'}
assert index._column_infos['id'].config['hi'] == 'there'

assert issubclass(index._column_infos['d__tens'].docarray_type, AbstractTensor)
assert index._column_infos['d__tens'].db_type == str
Expand All @@ -214,7 +214,7 @@ def test_create_columns():
assert index._subindices['d']._column_infos['id'].docarray_type == ID
assert index._subindices['d']._column_infos['id'].db_type == str
assert index._subindices['d']._column_infos['id'].n_dim is None
assert index._subindices['d']._column_infos['id'].config == {'hi': 'there'}
assert index._subindices['d']._column_infos['id'].config['hi'] == 'there'

assert issubclass(
index._subindices['d']._column_infos['tens'].docarray_type, AbstractTensor
Expand Down Expand Up @@ -262,10 +262,10 @@ def test_create_columns():
assert (
index._subindices['d_root']._subindices['d']._column_infos['id'].n_dim is None
)
assert index._subindices['d_root']._subindices['d']._column_infos['id'].config == {
'hi': 'there'
}

assert (
index._subindices['d_root']._subindices['d']._column_infos['id'].config['hi']
== 'there'
)
assert issubclass(
index._subindices['d_root']
._subindices['d']
Expand Down