-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdatabase_interface.py
More file actions
121 lines (98 loc) · 3.1 KB
/
database_interface.py
File metadata and controls
121 lines (98 loc) · 3.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""Abstract base class for vector database."""
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
class DatabaseInterface(ABC):
"""Abstract interface for vector databases."""
@abstractmethod
def initialize(
self,
collection_name: str,
vector_size: int = 384,
model_name: Optional[str] = None,
) -> Optional[str]:
"""
Initialize or get a collection in the database.
Args:
collection_name: Name of the collection to initialize
vector_size: Dimension of the embedding vectors (default: 384 for all-MiniLM-L6-v2)
model_name: Name of the embedding model used to create the vectors
Returns:
None if initialization succeeded with the requested parameters,
or the stored model name if there's a dimension mismatch (caller should
reload with this model).
"""
pass
@abstractmethod
def get_model_name(self) -> Optional[str]:
"""
Get the model name stored in the collection metadata.
Returns:
The model name if stored, None otherwise
"""
pass
@abstractmethod
def add(
self,
ids: List[str],
embeddings: List[List[float]],
documents: List[str],
metadatas: List[Dict[str, Any]] = None,
) -> None:
"""
Add documents with embeddings to the database.
Args:
ids: Unique identifiers for the documents
embeddings: Vector embeddings for the documents
documents: The actual document contents
metadatas: Optional metadata for each document
"""
pass
@abstractmethod
def query(self, embedding: List[float], n_results: int = 5) -> Dict[str, Any]:
"""
Query the database with an embedding vector.
Args:
embedding: The query embedding vector
n_results: Number of results to return
Returns:
Query results containing distances and documents
"""
pass
@abstractmethod
def delete_collection(self, collection_name: str) -> None:
"""
Delete a collection from the database.
Args:
collection_name: Name of the collection to delete
"""
pass
@abstractmethod
def delete_by_ids(self, ids: List[str]) -> None:
"""
Delete documents by their IDs.
Args:
ids: List of document IDs to delete
"""
pass
@abstractmethod
def get_all_ids(self) -> List[str]:
"""
Get all document IDs in the collection.
Returns:
List of all document IDs
"""
pass
@abstractmethod
def get_ids_by_file(self, file_path: str) -> List[str]:
"""
Get all document IDs for a specific file.
Args:
file_path: Path to the file
Returns:
List of document IDs for that file
"""
pass
@abstractmethod
def close(self) -> None:
"""Close the database connection."""
pass