Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions src/mygrad/linalg/funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def norm(
axis: Optional[Union[int, Tuple[int]]] = None,
keepdims: bool = False,
*,
nan_to_num: bool = True,
constant: Optional[bool] = None,
) -> Tensor:
r"""Vector norm.
Expand Down Expand Up @@ -53,6 +54,10 @@ def norm(
result as dimensions with size one. With this option the result will
broadcast correctly against the original `x`.

nan_to_num : bool, optional (default=True)
If `True` then gradients that would store nans due to the presence of
zeros in `x` will instead store zeros in those places.

constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
Expand Down Expand Up @@ -113,14 +118,26 @@ def norm(
>>> l2_norms
Tensor([3.74165739, 1. ])

The presence of the elementwise absolute values in the norm means that zero-valued
entries in a vectors have an undefined derivative.
The presence of the elementwise absolute values in the norm operation means that zero-valued entries in any of
input vectors have an undefined derivative. When `nan_to_num=False` is specified these derivatives will be reported
as `nan`, otherwise they will be made to be 0.0.

>>> l2_norms = mg.linalg.norm(x, axis=1, ord=2, nan_to_num=True)
>>> l2_norms.backward()
>>> x.grad
array([[0.26726124, 0.53452248, 0.80178373],
[1. , nan, nan]])

This is rigorously true, but is often not the desired behavior in autodiff applications.
Rather, it can be preferable to use `0.0` to fill these undefined derivatives.
This is the default behavior, when `nan_to_num` is not specified.

>>> l2_norms = mg.linalg.norm(x, axis=1, ord=2, nan_to_num=False) # default setting: `nan_to_num=False`
>>> l2_norms.backward()
>>> x.grad
array([[0.26726124, 0.53452248, 0.80178373],
[1. , 0., 0.]])

L1 norms along each of the three columns:

>>> mg.linalg.norm(x, axis=0, ord=1)
Expand All @@ -143,7 +160,12 @@ def norm(
return Tensor._op(
Norm,
x,
op_kwargs={"axis": axis, "keepdims": keepdims, "ord": ord},
op_kwargs={
"axis": axis,
"keepdims": keepdims,
"ord": ord,
"nan_to_num": nan_to_num,
},
constant=constant,
)

Expand Down
18 changes: 15 additions & 3 deletions src/mygrad/linalg/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,17 @@ def _expand_dims(x, axis, original_ndmin):


class Norm(Operation):
def __call__(self, tensor, ord=None, axis=None, keepdims=False):
def __call__(
self,
tensor,
ord=None,
axis=None,
keepdims: bool = False,
*,
nan_to_num: bool = True
):
self.variables = (tensor,)
self._nan_to_num = nan_to_num
out = np.linalg.norm(tensor.data, ord=ord, axis=axis, keepdims=keepdims)

if isinstance(ord, Real) and np.isinf(ord): # pragma: no cover
Expand Down Expand Up @@ -304,7 +313,8 @@ def backward_var(self, grad: np.ndarray, index: int, **kwargs) -> np.ndarray:
# is broadcast-compatible with `tensor`
grad = _expand_dims(grad, axis=self.axis, original_ndmin=tensor.ndim)

invalid_derivative = np.where(x == 0)
if not self._nan_to_num:
invalid_derivative = np.where(x == 0)

if self.ord == 1:
out = np.sign(x)
Expand All @@ -327,5 +337,7 @@ def backward_var(self, grad: np.ndarray, index: int, **kwargs) -> np.ndarray:
out *= np.sign(x)
out *= _norm
out *= grad
out[invalid_derivative] = np.nan

if not self._nan_to_num:
out[invalid_derivative] = np.nan
return out
21 changes: 21 additions & 0 deletions src/mygrad/math/misc/funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def absolute(
where: Mask = True,
dtype: DTypeLikeReals = None,
constant: Optional[bool] = None,
nan_to_num: bool = True,
) -> Tensor: # pragma: no cover
"""The absolute value, computed elementwise.

Expand All @@ -57,6 +58,10 @@ def absolute(

Integer-type tensors must be constant.

nan_to_num : bool, optional (default=True)
If `True` then gradients that would store nans due to the presence of
zeros in `x` will instead store zeros in those places.

where : Mask
This condition is broadcast over the input. At locations where the
condition is True, the ``out`` tensor will be set to the ufunc result.
Expand Down Expand Up @@ -85,6 +90,22 @@ def absolute(
>>> mg.absolute([-1.2, 1.2])
Tensor([ 1.2, 1.2])

The absolute-value function is not differentiable at `x=0.0`.
By default the derivative at this point is treated as 0.

>>> x = mg.tensor([-2.0, 0.0, 2.0])
>>> mg.absolute(x).backward()
>>> x.grad
np.array([-1., 0., 1.])

However a more rigorous behavior can be enabled such that the
undefined derivative will be returned as `nan`.

>>> x = mg.tensor([-2.0, 0.0, 2.0])
>>> mg.absolute(x, nan_to_num=False).backward()
>>> x.grad
np.array([-1., nan, 1.])

Plot the function and its derivate over ``[-10, 10]``:

.. plot::
Expand Down
11 changes: 9 additions & 2 deletions src/mygrad/math/misc/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,17 @@
class Abs(UnaryUfunc):
numpy_ufunc = np.absolute

def __call__(self, *args, nan_to_num: bool = True, **kwargs):
self._nan_to_num = nan_to_num
return super().__call__(*args, **kwargs)

def backward_var(self, grad, index, **kwargs):
(a,) = self.variables

return grad * np.piecewise(
a.data, [a.data < 0, a.data == 0, a.data > 0], [-1, np.nan, 1]
a.data,
[a.data < 0, a.data == 0, a.data > 0],
[-1, (0 if self._nan_to_num else np.nan), 1],
)


Expand Down Expand Up @@ -117,4 +124,4 @@ def backward_var(self, grad, index, **kwargs):
dfdx = a[:, np.newaxis] * np.expand_dims(grad, -2)
return dfdx
else: # pragma: no cover
raise ValueError()
raise ValueError()
23 changes: 9 additions & 14 deletions src/mygrad/tensor_creation/funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,14 +675,14 @@ def full_like(


def arange(
start: Real,
stop: Real = None,
step: int = None,
dtype: Optional[DTypeLikeReals] = None,
*,
*args,
constant: Optional[bool] = None,
**kwargs,
) -> Tensor:
"""Return a Tensor with evenly-spaced values within a given interval.
"""
arange([start,] stop[, step,], dtype=None, *, constant=None)

Return a Tensor with evenly-spaced values within a given interval.

Values are generated within [start, stop). Note that for non-integer steps, results may be
inconsistent; you are better off using `linspace` instead.
Expand Down Expand Up @@ -726,19 +726,14 @@ def arange(
>>> import mygrad as mg
>>> mg.arange(3)
Tensor([0, 1, 2])
>>> mg.arange(3.0, constant=True)
Tensor([ 0., 1., 2.]) # resulting tensor will not back-propagate a gradient
>>> mg.arange(3.0, constant=True) # resulting tensor will not back-propagate a gradient
Tensor([ 0., 1., 2.])
>>> mg.arange(3,7)
Tensor([3, 4, 5, 6])
>>> mg.arange(3,7,2)
Tensor([3, 5])
"""
if stop is None:
arr = np.arange(start, step=step, dtype=dtype)
else:
arr = np.arange(start, stop, step=step, dtype=dtype)

return Tensor(arr, constant=constant, copy=False)
return Tensor(np.arange(*args, **kwargs), constant=constant, copy=False)


def linspace(
Expand Down
5 changes: 3 additions & 2 deletions src/mygrad/ufuncs/_ufunc_creators.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def __call__(
where: Mask = True,
dtype: DTypeLikeReals = None,
constant: Optional[bool] = None,
**kwargs,
) -> Tensor:
# it is fastest to check if out is None, which is likely the
# most common scenario, and this is a very "hot path" in the
Expand All @@ -191,15 +192,15 @@ def __call__(
out._in_place_op(
cls._wrapped_op,
x,
op_kwargs={"where": where, "dtype": dtype},
op_kwargs={"where": where, "dtype": dtype, **kwargs},
constant=constant,
)
return out
else:
return Tensor._op(
cls._wrapped_op,
x,
op_kwargs={"where": where, "dtype": dtype},
op_kwargs={"where": where, "dtype": dtype, **kwargs},
constant=constant,
out=out,
)
Expand Down
14 changes: 14 additions & 0 deletions tests/linalg/test_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,17 @@ def test_norm_backward_1d(x, data, ord):

assert_allclose(o1, o2)
assert_allclose(t1.grad, t2.grad, atol=1e-7, rtol=1e-7)


def test_nan_to_num_behavior():
x = mg.tensor([[1.0, 2.0, 3.0], [1.0, 0.0, 0.0]])
y = x.copy()
z = x.copy()

mg.linalg.norm(x, axis=1, nan_to_num=False).backward()
mg.linalg.norm(y, axis=1, nan_to_num=True).backward()
mg.linalg.norm(z, axis=1).backward() # default behavior should be `nan_to_num=True`

assert np.isnan(x.grad).sum() == 2
assert_allclose(np.nan_to_num(x.grad), y.grad)
assert_allclose(z.grad, y.grad)
14 changes: 14 additions & 0 deletions tests/ufuncs/test_fwd_prop_and_backprop.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,17 @@ def test_arctan2_bkwd_pos_x():
)
def test_arctan2_bkwd_neg_x():
pass


def test_abs_nan_to_num():
x = mg.arange(-2.0, 3.0)
y = x.copy()
z = x.copy()

mg.abs(x, nan_to_num=False).backward()
mg.abs(y, nan_to_num=True).backward()
mg.abs(z).backward()

assert np.all(np.isnan(x.grad) == np.array([False, False, True, False, False]))
assert_allclose(np.nan_to_num(x.grad), y.grad)
assert_allclose(y.grad, z.grad)