Skip to content

Commit f2cd351

Browse files
authored
Merge pull request #1 from pgvector/master
Add new base repo updates
2 parents 3c51f39 + 1905422 commit f2cd351

38 files changed

+837
-343
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ venv/
55
.cache/
66
*.pyc
77
__pycache__
8+
.pytest_cache/

CHANGELOG.md

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1-
## 0.2.6 (unreleased)
1+
## 0.3.0 (unreleased)
22

3+
- Added support for `halfvec` and `sparsevec` types to Django
4+
- Added support for `halfvec` and `sparsevec` types to SQLAlchemy and SQLModel
35
- Added support for `halfvec` and `sparsevec` types to Psycopg 3
6+
- Added support for `halfvec` and `sparsevec` types to Psycopg 2
7+
- Added support for `halfvec` and `sparsevec` types to asyncpg
8+
- Added support for `halfvec` and `sparsevec` types to Peewee
9+
- Added `L1Distance` for Django
10+
- Added `l1_distance` for SQLAlchemy, SQLModel, and Peewee
411

512
## 0.2.5 (2024-02-07)
613

@@ -35,8 +42,8 @@
3542

3643
## 0.1.7 (2023-05-11)
3744

38-
- Added `register_vector_async` for psycopg3
39-
- Fixed `set_types` for psycopg3
45+
- Added `register_vector_async` for Psycopg 3
46+
- Fixed `set_types` for Psycopg 3
4047

4148
## 0.1.6 (2022-05-22)
4249

@@ -49,12 +56,12 @@
4956

5057
## 0.1.4 (2021-10-12)
5158

52-
- Updated psycopg3 integration for 3.0 release (no longer experimental)
59+
- Updated Psycopg 3 integration for 3.0 release (no longer experimental)
5360

5461
## 0.1.3 (2021-06-22)
5562

5663
- Added support for asyncpg
57-
- Added experimental support for psycopg3
64+
- Added experimental support for Psycopg 3
5865

5966
## 0.1.2 (2021-06-13)
6067

LICENSE.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
The MIT License (MIT)
22

3-
Copyright (c) 2021-2023 Andrew Kane
3+
Copyright (c) 2021-2024 Andrew Kane
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

pgvector/asyncpg/__init__.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,26 @@
1-
from ..utils import from_db, from_db_binary, to_db, to_db_binary
1+
from ..utils import Vector, HalfVec, SparseVec
22

33
__all__ = ['register_vector']
44

55

66
async def register_vector(conn):
77
await conn.set_type_codec(
88
'vector',
9-
encoder=to_db_binary,
10-
decoder=from_db_binary,
9+
encoder=Vector.to_db_binary,
10+
decoder=Vector.from_db_binary,
11+
format='binary'
12+
)
13+
14+
await conn.set_type_codec(
15+
'halfvec',
16+
encoder=HalfVec.to_db_binary,
17+
decoder=HalfVec.from_db_binary,
18+
format='binary'
19+
)
20+
21+
await conn.set_type_codec(
22+
'sparsevec',
23+
encoder=SparseVec.to_db_binary,
24+
decoder=SparseVec.from_db_binary,
1125
format='binary'
1226
)

pgvector/django/__init__.py

Lines changed: 9 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -1,130 +1,9 @@
1-
from django.contrib.postgres.operations import CreateExtension
2-
from django.contrib.postgres.indexes import PostgresIndex
3-
from django.db.models import Field, FloatField, Func, Value
4-
import numpy as np
5-
from .forms import VectorFormField
6-
from ..utils import from_db, to_db
7-
8-
__all__ = ['VectorExtension', 'VectorField', 'IvfflatIndex', 'HnswIndex', 'L2Distance', 'MaxInnerProduct', 'CosineDistance']
9-
10-
11-
class VectorExtension(CreateExtension):
12-
def __init__(self):
13-
self.name = 'vector'
14-
15-
16-
# https://docs.djangoproject.com/en/4.2/howto/custom-model-fields/
17-
class VectorField(Field):
18-
description = 'Vector'
19-
empty_strings_allowed = False
20-
21-
def __init__(self, *args, dimensions=None, **kwargs):
22-
self.dimensions = dimensions
23-
super().__init__(*args, **kwargs)
24-
25-
def deconstruct(self):
26-
name, path, args, kwargs = super().deconstruct()
27-
if self.dimensions is not None:
28-
kwargs['dimensions'] = self.dimensions
29-
return name, path, args, kwargs
30-
31-
def db_type(self, connection):
32-
if self.dimensions is None:
33-
return 'vector'
34-
return 'vector(%d)' % self.dimensions
35-
36-
def from_db_value(self, value, expression, connection):
37-
return from_db(value)
38-
39-
def to_python(self, value):
40-
if isinstance(value, list):
41-
return np.array(value, dtype=np.float32)
42-
return from_db(value)
43-
44-
def get_prep_value(self, value):
45-
return to_db(value)
46-
47-
def value_to_string(self, obj):
48-
return self.get_prep_value(self.value_from_object(obj))
49-
50-
def validate(self, value, model_instance):
51-
if isinstance(value, np.ndarray):
52-
value = value.tolist()
53-
super().validate(value, model_instance)
54-
55-
def run_validators(self, value):
56-
if isinstance(value, np.ndarray):
57-
value = value.tolist()
58-
super().run_validators(value)
59-
60-
def formfield(self, **kwargs):
61-
return super().formfield(form_class=VectorFormField, **kwargs)
62-
63-
64-
class IvfflatIndex(PostgresIndex):
65-
suffix = 'ivfflat'
66-
67-
def __init__(self, *expressions, lists=None, **kwargs):
68-
self.lists = lists
69-
super().__init__(*expressions, **kwargs)
70-
71-
def deconstruct(self):
72-
path, args, kwargs = super().deconstruct()
73-
if self.lists is not None:
74-
kwargs['lists'] = self.lists
75-
return path, args, kwargs
76-
77-
def get_with_params(self):
78-
with_params = []
79-
if self.lists is not None:
80-
with_params.append('lists = %d' % self.lists)
81-
return with_params
82-
83-
84-
class HnswIndex(PostgresIndex):
85-
suffix = 'hnsw'
86-
87-
def __init__(self, *expressions, m=None, ef_construction=None, **kwargs):
88-
self.m = m
89-
self.ef_construction = ef_construction
90-
super().__init__(*expressions, **kwargs)
91-
92-
def deconstruct(self):
93-
path, args, kwargs = super().deconstruct()
94-
if self.m is not None:
95-
kwargs['m'] = self.m
96-
if self.ef_construction is not None:
97-
kwargs['ef_construction'] = self.ef_construction
98-
return path, args, kwargs
99-
100-
def get_with_params(self):
101-
with_params = []
102-
if self.m is not None:
103-
with_params.append('m = %d' % self.m)
104-
if self.ef_construction is not None:
105-
with_params.append('ef_construction = %d' % self.ef_construction)
106-
return with_params
107-
108-
109-
class DistanceBase(Func):
110-
output_field = FloatField()
111-
112-
def __init__(self, expression, vector, **extra):
113-
if not hasattr(vector, 'resolve_expression'):
114-
vector = Value(to_db(vector))
115-
super().__init__(expression, vector, **extra)
116-
117-
118-
class L2Distance(DistanceBase):
119-
function = ''
120-
arg_joiner = ' <-> '
121-
122-
123-
class MaxInnerProduct(DistanceBase):
124-
function = ''
125-
arg_joiner = ' <#> '
126-
127-
128-
class CosineDistance(DistanceBase):
129-
function = ''
130-
arg_joiner = ' <=> '
1+
from .extensions import VectorExtension
2+
from .functions import L2Distance, MaxInnerProduct, CosineDistance, L1Distance
3+
from .halfvec import HalfvecField
4+
from .indexes import IvfflatIndex, HnswIndex
5+
from .sparsevec import SparsevecField
6+
from .vector import VectorField
7+
from ..utils import SparseVec
8+
9+
__all__ = ['VectorExtension', 'VectorField', 'HalfvecField', 'SparsevecField', 'IvfflatIndex', 'HnswIndex', 'L2Distance', 'MaxInnerProduct', 'CosineDistance', 'L1Distance']

pgvector/django/extensions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from django.contrib.postgres.operations import CreateExtension
2+
3+
4+
class VectorExtension(CreateExtension):
5+
def __init__(self):
6+
self.name = 'vector'

pgvector/django/forms.py

Lines changed: 0 additions & 12 deletions
This file was deleted.

pgvector/django/functions.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from django.db.models import FloatField, Func, Value
2+
from ..utils import Vector
3+
4+
5+
class DistanceBase(Func):
6+
output_field = FloatField()
7+
8+
def __init__(self, expression, vector, **extra):
9+
if not hasattr(vector, 'resolve_expression'):
10+
vector = Value(Vector.to_db(vector))
11+
super().__init__(expression, vector, **extra)
12+
13+
14+
class L2Distance(DistanceBase):
15+
function = ''
16+
arg_joiner = ' <-> '
17+
18+
19+
class MaxInnerProduct(DistanceBase):
20+
function = ''
21+
arg_joiner = ' <#> '
22+
23+
24+
class CosineDistance(DistanceBase):
25+
function = ''
26+
arg_joiner = ' <=> '
27+
28+
29+
class L1Distance(DistanceBase):
30+
function = ''
31+
arg_joiner = ' <+> '

pgvector/django/halfvec.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from django.db.models import Field
2+
from ..utils import HalfVec
3+
4+
5+
# https://docs.djangoproject.com/en/4.2/howto/custom-model-fields/
6+
class HalfvecField(Field):
7+
description = 'Halfvec'
8+
empty_strings_allowed = False
9+
10+
def __init__(self, *args, dimensions=None, **kwargs):
11+
self.dimensions = dimensions
12+
super().__init__(*args, **kwargs)
13+
14+
def deconstruct(self):
15+
name, path, args, kwargs = super().deconstruct()
16+
if self.dimensions is not None:
17+
kwargs['dimensions'] = self.dimensions
18+
return name, path, args, kwargs
19+
20+
def db_type(self, connection):
21+
if self.dimensions is None:
22+
return 'halfvec'
23+
return 'halfvec(%d)' % self.dimensions
24+
25+
def from_db_value(self, value, expression, connection):
26+
return HalfVec.from_db(value)
27+
28+
def to_python(self, value):
29+
return HalfVec.from_db(value)
30+
31+
def get_prep_value(self, value):
32+
return HalfVec.to_db(value)
33+
34+
def value_to_string(self, obj):
35+
return self.get_prep_value(self.value_from_object(obj))

pgvector/django/indexes.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from django.contrib.postgres.indexes import PostgresIndex
2+
3+
4+
class IvfflatIndex(PostgresIndex):
5+
suffix = 'ivfflat'
6+
7+
def __init__(self, *expressions, lists=None, **kwargs):
8+
self.lists = lists
9+
super().__init__(*expressions, **kwargs)
10+
11+
def deconstruct(self):
12+
path, args, kwargs = super().deconstruct()
13+
if self.lists is not None:
14+
kwargs['lists'] = self.lists
15+
return path, args, kwargs
16+
17+
def get_with_params(self):
18+
with_params = []
19+
if self.lists is not None:
20+
with_params.append('lists = %d' % self.lists)
21+
return with_params
22+
23+
24+
class HnswIndex(PostgresIndex):
25+
suffix = 'hnsw'
26+
27+
def __init__(self, *expressions, m=None, ef_construction=None, **kwargs):
28+
self.m = m
29+
self.ef_construction = ef_construction
30+
super().__init__(*expressions, **kwargs)
31+
32+
def deconstruct(self):
33+
path, args, kwargs = super().deconstruct()
34+
if self.m is not None:
35+
kwargs['m'] = self.m
36+
if self.ef_construction is not None:
37+
kwargs['ef_construction'] = self.ef_construction
38+
return path, args, kwargs
39+
40+
def get_with_params(self):
41+
with_params = []
42+
if self.m is not None:
43+
with_params.append('m = %d' % self.m)
44+
if self.ef_construction is not None:
45+
with_params.append('ef_construction = %d' % self.ef_construction)
46+
return with_params

0 commit comments

Comments
 (0)