forked from pgvector/pgvector-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsparsevec.py
More file actions
74 lines (59 loc) · 2.28 KB
/
Copy pathsparsevec.py
File metadata and controls
74 lines (59 loc) · 2.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import numpy as np
from struct import pack, unpack_from
def to_db_value(value):
if isinstance(value, SparseVector):
return value
elif isinstance(value, (list, np.ndarray)):
return SparseVector.from_dense(value)
else:
raise ValueError('expected sparsevec')
class SparseVector:
def __init__(self, dim, indices, values):
self.dim = dim
self.indices = indices
self.values = values
def from_dense(value):
if isinstance(value, np.ndarray):
value = value.tolist()
dim = len(value)
indices = [i for i, v in enumerate(value) if v != 0]
values = [value[i] for i in indices]
return SparseVector(dim, indices, values)
def to_dense(self):
vec = [0] * self.dim
for i, v in zip(self.indices, self.values):
vec[i] = v
return vec
def to_db(value, dim=None):
if value is None:
return value
value = to_db_value(value)
if dim is not None and value.dim != dim:
raise ValueError('expected %d dimensions, not %d' % (dim, value.dim))
return '{' + ','.join([f'{i + 1}:{v}' for i, v in zip(value.indices, value.values)]) + '}/' + str(value.dim)
def to_db_binary(value):
if value is None:
return value
value = to_db_value(value)
nnz = len(value.indices)
return pack(f'>iii{nnz}i{nnz}f', value.dim, nnz, 0, *value.indices, *value.values)
def from_db(value):
if value is None or isinstance(value, SparseVector):
return value
elements, dim = value.split('/')
indices = []
values = []
for e in elements[1:-1].split(','):
i, v = e.split(':')
indices.append(int(i) - 1)
values.append(float(v))
return SparseVector(int(dim), indices, values)
def from_db_binary(value):
if value is None or isinstance(value, SparseVector):
return value
dim, nnz, unused = unpack_from('>iii', value)
indices = unpack_from(f'>{nnz}i', value, 12)
values = unpack_from(f'>{nnz}f', value, 12 + nnz * 4)
return SparseVector(int(dim), indices, values)
def __repr__(self):
return f'SparseVector({self.dim}, {self.indices}, {self.values})'