Skip to content

Commit 21896ce

Browse files
committed
Add a _placeholder_vector field when no vector field exists in the
schema, and populate it with a zero vector on writes. This is the recommended workaround per milvus-io/milvus#33853. Signed-off-by: Chaitany Patel <patelchaitany93@gmail.com>
1 parent ece0126 commit 21896ce

2 files changed

Lines changed: 23 additions & 8 deletions

File tree

sdk/python/feast/infra/online_stores/milvus_online_store/milvus.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,18 @@ def _get_or_create_collection(
235235
max_length=field_max_length,
236236
)
237237
)
238+
has_vector_field = any(
239+
f.dtype in (DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR)
240+
for f in fields
241+
)
242+
if not has_vector_field:
243+
fields.append(
244+
FieldSchema(
245+
name="_placeholder_vector",
246+
dtype=DataType.FLOAT_VECTOR,
247+
dim=1,
248+
)
249+
)
238250
schema = CollectionSchema(
239251
fields=fields, description="Feast feature view data"
240252
)
@@ -345,10 +357,13 @@ def online_write_batch(
345357
"created_ts": created_ts_int,
346358
}
347359
single_entity_record.update(values_dict)
348-
# Ensure all required fields exist, setting missing ones to empty strings
360+
# Ensure all required fields exist, setting missing ones to defaults
349361
for field in required_fields:
350362
if field not in single_entity_record:
351-
single_entity_record[field] = ""
363+
if field == "_placeholder_vector":
364+
single_entity_record[field] = [float("nan")]
365+
else:
366+
single_entity_record[field] = ""
352367
# Store only the latest event timestamp per entity
353368
if (
354369
entity_key_str not in unique_entities

sdk/python/tests/unit/online_store/test_online_retrieval.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,7 +1303,6 @@ def test_milvus_stored_writes_with_explode() -> None:
13031303
)
13041304

13051305
random.seed(42)
1306-
vector_length = 10
13071306
runner = CliRunner()
13081307
with runner.local_repo(
13091308
example_repo_py=get_example_repo("example_rag_feature_repo.py"),
@@ -1357,10 +1356,10 @@ def milvus_explode_feature_view(inputs: dict[str, Any]):
13571356
"Document chunking example.",
13581357
],
13591358
"vector": [
1360-
[0.1] * vector_length,
1361-
[0.2] * vector_length,
1362-
[0.3] * vector_length,
1363-
[0.4] * vector_length,
1359+
[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
1360+
[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
1361+
[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
1362+
[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
13641363
],
13651364
}
13661365
return output
@@ -1428,7 +1427,8 @@ def milvus_explode_feature_view(inputs: dict[str, Any]):
14281427
)
14291428

14301429
# Test vector search using Milvus
1431-
query_embedding = [0.1] * vector_length
1430+
# Query vector closest to doc_2/chunk-1 [0,0,1,0,...] then doc_1/chunk-2 [0,1,0,0,...]
1431+
query_embedding = [0.0, 0.3, 0.9, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
14321432

14331433
# First get Milvus client and search directly
14341434
client = store._provider._online_store.client

0 commit comments

Comments
 (0)