-
Notifications
You must be signed in to change notification settings - Fork 374
Expand file tree
/
Copy pathmilvus_example.py
More file actions
138 lines (107 loc) · 4.54 KB
/
Copy pathmilvus_example.py
File metadata and controls
138 lines (107 loc) · 4.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import json
from datetime import datetime
import numpy as np
from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, connections, utility
from testcontainers.community.milvus import MilvusContainer
def basic_example():
with MilvusContainer() as milvus:
# Get connection parameters
host = milvus.get_container_host_ip()
port = milvus.get_exposed_port(milvus.port)
# Connect to Milvus
connections.connect(alias="default", host=host, port=port)
print("Connected to Milvus")
# Create collection
collection_name = "test_collection"
dim = 128
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=dim),
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=500),
FieldSchema(name="category", dtype=DataType.VARCHAR, max_length=100),
FieldSchema(name="tags", dtype=DataType.JSON),
FieldSchema(name="timestamp", dtype=DataType.VARCHAR, max_length=50),
]
schema = CollectionSchema(fields=fields, description="Test collection")
collection = Collection(name=collection_name, schema=schema)
print(f"Created collection: {collection_name}")
# Create index
index_params = {"metric_type": "COSINE", "index_type": "IVF_FLAT", "params": {"nlist": 1024}}
collection.create_index(field_name="vector", index_params=index_params)
print("Created index on vector field")
# Generate test data
num_entities = 5
vectors = np.random.rand(num_entities, dim).tolist()
texts = [
"AI and machine learning are transforming industries",
"New study reveals benefits of meditation",
"Global warming reaches critical levels",
"Stock market shows strong growth",
"New restaurant opens in downtown",
]
categories = ["Technology", "Health", "Environment", "Finance", "Food"]
tags = [
["AI", "ML", "innovation"],
["wellness", "mental health"],
["climate", "sustainability"],
["investing", "markets"],
["dining", "local"],
]
timestamps = [datetime.utcnow().isoformat() for _ in range(num_entities)]
# Insert data
entities = [vectors, texts, categories, tags, timestamps]
collection.insert(entities)
print("Inserted test data")
# Flush collection
collection.flush()
print("Flushed collection")
# Load collection
collection.load()
print("Loaded collection")
# Search vectors
search_params = {"metric_type": "COSINE", "params": {"nprobe": 10}}
results = collection.search(
data=[vectors[0]],
anns_field="vector",
param=search_params,
limit=3,
output_fields=["text", "category", "tags"],
)
print("\nSearch results:")
for hits in results:
for hit in hits:
print(json.dumps({"id": hit.id, "distance": hit.distance, "entity": hit.entity}, indent=2))
# Query with filter
filter_expr = 'category == "Technology"'
query_results = collection.query(expr=filter_expr, output_fields=["text", "category", "tags"])
print("\nQuery results with filter:")
print(json.dumps(query_results, indent=2))
# Get collection stats
stats = collection.get_statistics()
print("\nCollection statistics:")
print(json.dumps(stats, indent=2))
# Create partition
partition_name = "test_partition"
collection.create_partition(partition_name)
print(f"\nCreated partition: {partition_name}")
# List partitions
partitions = collection.partitions
print("\nPartitions:")
for partition in partitions:
print(
json.dumps(
{"name": partition.name, "is_empty": partition.is_empty, "num_entities": partition.num_entities},
indent=2,
)
)
# Delete partition
collection.drop_partition(partition_name)
print(f"Deleted partition: {partition_name}")
# Clean up
utility.drop_collection(collection_name)
print("\nDropped collection")
# Disconnect
connections.disconnect("default")
print("Disconnected from Milvus")
if __name__ == "__main__":
basic_example()