Address precision matrix instability of MVN distribution (#21366)

fehiepsi · facebook-github-bot · commit f8cab38578a9 · 2019-06-06T13:54:46.000-07:00
Summary: Currently, when the input of MVN is precision matrix, we take inverse to convert the result to covariance matrix. This, however, will easily make the covariance matrix not positive definite, hence will trigger a cholesky error. For example, ``` import torch torch.manual_seed(0) x = torch.randn(10) P = torch.exp(-(x - x.unsqueeze(-1)) ** 2) torch.distributions.MultivariateNormal(loc=torch.ones(10), precision_matrix=P) ``` will trigger `RuntimeError: cholesky_cpu: U(8,8) is zero, singular U.` This PR uses some math tricks ([ref](https://nbviewer.jupyter.org/gist/fehiepsi/5ef8e09e61604f10607380467eb82006#Precision-to-scale_tril)) to only take inverse of a triangular matrix, hence increase the stability. cc fritzo, neerajprad , SsnL Pull Request resolved: #21366 Differential Revision: D15696972 Pulled By: ezyang fbshipit-source-id: cec13f7dfdbd06dee94b8bed8ff0b3e720c7a188
diff --git a/test/test_distributions.py b/test/test_distributions.py
@@ -1781,6 +1781,11 @@ def gradcheck_func(samples, mu, sigma, prec, scale_tril):
         multivariate_normal_log_prob_gradcheck(mean, None, None, scale_tril)
         multivariate_normal_log_prob_gradcheck(mean_no_batch, None, None, scale_tril_batched)
 
+    def test_multivariate_normal_stable_with_precision_matrix(self):
+        x = torch.randn(10)
+        P = torch.exp(-(x - x.unsqueeze(-1)) ** 2)  # RBF kernel
+        MultivariateNormal(x.new_zeros(10), precision_matrix=P)
+
     @unittest.skipIf(not TEST_NUMPY, "Numpy not found")
     def test_multivariate_normal_log_prob(self):
         mean = torch.randn(3, requires_grad=True)
diff --git a/torch/distributions/multivariate_normal.py b/torch/distributions/multivariate_normal.py
@@ -66,6 +66,15 @@ def _batch_mahalanobis(bL, bx):
     return reshaped_M.reshape(bx_batch_shape)
 
 
+def _precision_to_scale_tril(P):
+    # Ref: https://nbviewer.jupyter.org/gist/fehiepsi/5ef8e09e61604f10607380467eb82006#Precision-to-scale_tril
+    Lf = torch.cholesky(torch.flip(P, (-2, -1)))
+    L_inv = torch.transpose(torch.flip(Lf, (-2, -1)), -2, -1)
+    L = torch.triangular_solve(torch.eye(P.shape[-1], dtype=P.dtype, device=P.device),
+                               L_inv, upper=False)[0]
+    return L
+
+
 class MultivariateNormal(Distribution):
     r"""
     Creates a multivariate normal (also called Gaussian) distribution
@@ -136,10 +145,10 @@ def __init__(self, loc, covariance_matrix=None, precision_matrix=None, scale_tri
 
         if scale_tril is not None:
             self._unbroadcasted_scale_tril = scale_tril
-        else:
-            if precision_matrix is not None:
-                self.covariance_matrix = torch.inverse(precision_matrix).expand_as(loc_)
-            self._unbroadcasted_scale_tril = torch.cholesky(self.covariance_matrix)
+        elif covariance_matrix is not None:
+            self._unbroadcasted_scale_tril = torch.cholesky(covariance_matrix)
+        else:  # precision_matrix is not None
+            self._unbroadcasted_scale_tril = _precision_to_scale_tril(precision_matrix)
 
     def expand(self, batch_shape, _instance=None):
         new = self._get_checked_instance(MultivariateNormal, _instance)