-
Notifications
You must be signed in to change notification settings - Fork 237
Expand file tree
/
Copy pathnumpy.py
More file actions
90 lines (71 loc) · 2.91 KB
/
numpy.py
File metadata and controls
90 lines (71 loc) · 2.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from typing import TYPE_CHECKING
import numpy as np
if TYPE_CHECKING: # pragma: no cover
from docarray.typing import ArrayType
def cosine(x_mat: 'np.ndarray', y_mat: 'np.ndarray', eps: float = 1e-7) -> 'np.ndarray':
"""Cosine distance between each row in x_mat and each row in y_mat.
:param x_mat: np.ndarray with ndim=2
:param y_mat: np.ndarray with ndim=2
:param eps: a small jitter to avoid divde by zero
:return: np.ndarray with ndim=2
"""
return 1 - np.clip(
(np.dot(x_mat, y_mat.T) + eps)
/ (
np.outer(np.linalg.norm(x_mat, axis=1), np.linalg.norm(y_mat, axis=1)) + eps
),
-1,
1,
)
def sqeuclidean(x_mat: 'np.ndarray', y_mat: 'np.ndarray') -> 'np.ndarray':
"""Squared Euclidean distance between each row in x_mat and each row in y_mat.
:param x_mat: np.ndarray with ndim=2
:param y_mat: np.ndarray with ndim=2
:return: np.ndarray with ndim=2
"""
return (
np.sum(y_mat**2, axis=1)
+ np.sum(x_mat**2, axis=1)[:, np.newaxis]
- 2 * np.dot(x_mat, y_mat.T)
)
def sparse_cosine(x_mat: 'ArrayType', y_mat: 'ArrayType') -> 'np.ndarray':
"""Cosine distance between each row in x_mat and each row in y_mat.
:param x_mat: scipy.sparse like array with ndim=2
:param y_mat: scipy.sparse like array with ndim=2
:return: np.ndarray with ndim=2
"""
from scipy.sparse.linalg import norm
# we need the np.asarray otherwise we get a np.matrix object that iterates differently
return 1 - np.clip(
np.asarray(
x_mat.dot(y_mat.T) / (np.outer(norm(x_mat, axis=1), norm(y_mat, axis=1)))
),
-1,
1,
)
def sparse_sqeuclidean(x_mat: 'ArrayType', y_mat: 'ArrayType') -> 'np.ndarray':
"""Cosine distance between each row in x_mat and each row in y_mat.
:param x_mat: scipy.sparse like array with ndim=2
:param y_mat: scipy.sparse like array with ndim=2
:return: np.ndarray with ndim=2
"""
# we need the np.asarray otherwise we get a np.matrix object that iterates differently
return np.asarray(
y_mat.power(2).sum(axis=1).flatten()
+ x_mat.power(2).sum(axis=1)
- 2 * x_mat.dot(y_mat.T)
)
def sparse_euclidean(x_mat: 'ArrayType', y_mat: 'ArrayType') -> 'np.ndarray':
"""Sparse euclidean distance between each row in x_mat and each row in y_mat.
:param x_mat: scipy.sparse like array with ndim=2
:param y_mat: scipy.sparse like array with ndim=2
:return: np.ndarray with ndim=2
"""
return np.sqrt(sparse_sqeuclidean(x_mat, y_mat))
def euclidean(x_mat: 'ArrayType', y_mat: 'ArrayType') -> 'np.ndarray':
"""Euclidean distance between each row in x_mat and each row in y_mat.
:param x_mat: scipy.sparse like array with ndim=2
:param y_mat: scipy.sparse like array with ndim=2
:return: np.ndarray with ndim=2
"""
return np.sqrt(sqeuclidean(x_mat, y_mat))