pytorch · ezyang · Mar 7, 2018 · Mar 1, 2018 · Mar 1, 2018 · Mar 1, 2018
diff --git a/aten/src/ATen/native/Unique.cpp b/aten/src/ATen/native/Unique.cpp
@@ -0,0 +1,59 @@
+// Returns unique elements of input tensor.
+
+#include "ATen/ATen.h"
+#include "ATen/Dispatch.h"
+
+#include <set>
+#include <tuple>
+#include <unordered_map>
+#include <unordered_set>
+
+namespace at {
+namespace native{
+
+namespace {
+
+template <template <class...> class set_type, typename scalar_t>
+std::tuple<Tensor, Tensor> _unique_cpu_template(
+    const Tensor& self,
+    const bool return_inverse) {
+  const Tensor& input = self.contiguous();
+  const scalar_t* input_data = input.data<scalar_t>();
+  set_type<scalar_t> set(input_data, input_data + input.numel());
+  Tensor output = input.type().tensor({static_cast<int64_t>(set.size())});
+  scalar_t* output_data = output.data<scalar_t>();
+  std::copy(set.begin(), set.end(), output_data);
+
+  Tensor inverse_indices = self.type().toScalarType(kLong).tensor({0});
+  if (return_inverse) {
+    inverse_indices.resize_(input.sizes());
+    int64_t* inverse_indices_data = inverse_indices.data<int64_t>();
+    std::unordered_map<scalar_t, int64_t> inverse_map;
+    inverse_map.reserve(output.numel());
+    for (int i = 0; i < output.numel(); ++i) {
+      inverse_map[output_data[i]] = i;
+    }
+    for (int i = 0; i < input.numel(); ++i) {
+      inverse_indices_data[i] = inverse_map[input_data[i]];
+    }
+  }
+  return std::make_tuple(output, inverse_indices);
+}
+} // namespace
+
+std::tuple<Tensor, Tensor>
+_unique_cpu(const Tensor& self, const bool sorted, const bool return_inverse) {
+  if (sorted) {
+    return AT_DISPATCH_ALL_TYPES(self.type(), "unique", [&] {
+      return _unique_cpu_template<std::set, scalar_t>(self, return_inverse);
+    });
+  } else {
+    return AT_DISPATCH_ALL_TYPES(self.type(), "unique", [&] {
+      return _unique_cpu_template<std::unordered_set, scalar_t>(
+          self, return_inverse);
+    });
+  }
+}
+
+}  // namespace native
+}  // namespace at
diff --git a/aten/src/ATen/native/cuda/Unique.cu b/aten/src/ATen/native/cuda/Unique.cu
@@ -0,0 +1,16 @@
+#include "ATen/ATen.h"
+
+#include <tuple>
+
+namespace at {
+namespace native{
+
+std::tuple<Tensor, Tensor>
+_unique_cuda(const Tensor& self, const bool sorted, const bool return_inverse) {
+  throw std::runtime_error(
+      "unique is currently CPU-only, and lacks CUDA support. "
+      "Pull requests welcome!");
+}
+
+}  // namespace native
+}  // namespace at
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -363,6 +363,11 @@
 - func: type_as(Tensor self, Tensor other) -> Tensor
   variants: method
 
+- func: _unique(Tensor self, bool sorted=false, bool return_inverse=false) -> (Tensor, Tensor)
+  dispatch:
+    CPU: _unique_cpu
+    CUDA: _unique_cuda
+
 - func: _unsafe_view(Tensor self, IntList size) -> Tensor
   variants: function
 

diff --git a/docs/source/tensors.rst b/docs/source/tensors.rst
@@ -311,6 +311,7 @@ view of a storage and defines numeric operations on it.
    .. automethod:: type_as
    .. automethod:: unfold
    .. automethod:: uniform_
+   .. automethod:: unique
    .. automethod:: unsqueeze
    .. automethod:: unsqueeze_
    .. automethod:: var

diff --git a/docs/source/torch.rst b/docs/source/torch.rst
@@ -143,6 +143,7 @@ Reduction Ops
 .. autofunction:: prod
 .. autofunction:: std
 .. autofunction:: sum
+.. autofunction:: unique
 .. autofunction:: var
 
 

diff --git a/test/test_torch.py b/test/test_torch.py
@@ -5422,6 +5422,67 @@ def test_set_flush_denormal(self):
         self.assertEqual(double_tensor[2], 0.0, prec=0.0)  # tiny_double to zero
         torch.set_flush_denormal(False)
 
+    def test_unique_cpu(self):
+        x = torch.LongTensor([1, 2, 3, 2, 8, 5, 2, 3])
+        expected_unique = torch.LongTensor([1, 2, 3, 5, 8])
+        expected_inverse = torch.LongTensor([0, 1, 2, 1, 4, 3, 1, 2])
+
+        x_unique = torch.unique(x)
+        self.assertEqual(
+            expected_unique.tolist(), sorted(x_unique.tolist()))
+
+        x_unique, x_inverse = x.unique(return_inverse=True)
+        self.assertEqual(
+            expected_unique.tolist(), sorted(x_unique.tolist()))
+        self.assertEqual(expected_inverse.numel(), x_inverse.numel())
+
+        x_unique = x.unique(sorted=True)
+        self.assertEqual(expected_unique, x_unique)
+
+        x_unique, x_inverse = torch.unique(
+            x, sorted=True, return_inverse=True)
+        self.assertEqual(expected_unique, x_unique)
+        self.assertEqual(expected_inverse, x_inverse)
+
+        # Tests per-element unique on a higher rank tensor.
+        y = x.view(2, 2, 2)
+        y_unique, y_inverse = y.unique(sorted=True, return_inverse=True)
+        self.assertEqual(expected_unique, y_unique)
+        self.assertEqual(expected_inverse.view(y.size()), y_inverse)
+
+        # Tests unique on other types.
+        int_unique, int_inverse = torch.unique(
+            torch.IntTensor([2, 1, 2]), sorted=True, return_inverse=True)
+        self.assertEqual(torch.IntTensor([1, 2]), int_unique)
+        self.assertEqual(torch.LongTensor([1, 0, 1]), int_inverse)
+
+        double_unique, double_inverse = torch.unique(
+            torch.DoubleTensor([2., 1.5, 2.1, 2.]),
+            sorted=True,
+            return_inverse=True,
+        )
+        self.assertEqual(torch.DoubleTensor([1.5, 2., 2.1]), double_unique)
+        self.assertEqual(torch.LongTensor([1, 0, 2, 1]), double_inverse)
+
+        byte_unique, byte_inverse = torch.unique(
+            torch.ByteTensor([133, 7, 7, 7, 42, 128]),
+            sorted=True,
+            return_inverse=True,
+        )
+        self.assertEqual(torch.ByteTensor([7, 42, 128, 133]), byte_unique)
+        self.assertEqual(torch.LongTensor([3, 0, 0, 0, 1, 2]), byte_inverse)
+
+    @unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
+    def test_unique_cuda(self):
+        # unique currently does not support CUDA.
+        self.assertRaises(
+            RuntimeError, lambda: torch.cuda.LongTensor([0, 1]).unique())
+        self.assertRaises(
+            RuntimeError,
+            lambda: torch.unique(torch.cuda.FloatTensor([0., 1.])),
+        )
+
+
 # Functions to test negative dimension wrapping
 METHOD = 1
 INPLACE_METHOD = 2

diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml
@@ -637,6 +637,9 @@
 - name: uniform_(Tensor self, double from, double to, Generator generator)
   self: zeros_like(grad)
 
+- name: _unique(Tensor self, bool sorted, bool return_inverse)
+  self: not_implemented("_unique")
+
 - name: _unsafe_view(Tensor self, IntList size)
   self: grad.contiguous().view(self.sizes())
 

diff --git a/torch/autograd/variable.py b/torch/autograd/variable.py
@@ -310,6 +310,18 @@ def masked_fill(self, mask, value):
     def expand_as(self, tensor):
         return self.expand(tensor.size())
 
+    def unique(self, sorted=False, return_inverse=False):
+        r"""Returns the unique scalar elements of the tensor as a 1-D tensor.
+
+        See :func:`torch.unique`
+        """
+        output, inverse_indices = self._unique(
+            sorted=sorted, return_inverse=return_inverse)
+        if return_inverse:
+            return output, inverse_indices
+        else:
+            return output
+
     def __rsub__(self, other):
         return -self + other
 

diff --git a/torch/functional.py b/torch/functional.py
@@ -4,8 +4,21 @@
 import math
 
 __all__ = [
-    'split', 'chunk', 'empty_like', 'stack', 'unbind', 'btriunpack', 'matmul', 'det', 'stft',
-    'hann_window', 'hamming_window', 'bartlett_window', 'where', 'isnan'
+    'bartlett_window',
+    'btriunpack',
+    'chunk',
+    'det',
+    'empty_like',
+    'hamming_window',
+    'hann_window',
+    'isnan',
+    'matmul',
+    'split',
+    'stack',
+    'stft',
+    'unbind',
+    'unique',
+    'where',
 ]
 
 
@@ -536,3 +549,76 @@ def isnan(tensor):
     if not torch.is_tensor(tensor):
         raise ValueError("The argument is not a tensor")
     return tensor != tensor
+
+
+def unique(input, sorted=False, return_inverse=False):
+    r"""Returns the unique scalar elements of the input tensor as a 1-D tensor.
+
+    Arguments:
+        input (Tensor): the input tensor
+        sorted (bool): Whether to sort the unique elements in ascending order
+            before returning as output.
+        return_inverse (bool): Whether to also return the indices for where
+            elements in the original input ended up in the returned unique list.
+
+    Returns:
+        (Tensor, Tensor (optional)): A tensor or a tuple of tensors containing
+
+            - **output** (*Tensor*): the output list of unique scalar elements.
+            - **inverse_indices** (*Tensor*): (optional) if
+              :attr:`return_inverse` is True, there will be a
+              2nd returned tensor (same shape as input) representing the indices
+              for where elements in the original input map to in the output;
+              otherwise, this function will only return a single tensor.
+
+    Example::
+
+        >>>> output = torch.unique(torch.LongTensor([1, 3, 2, 3]))
+        >>>> output
+
+         2
+         3
+         1
+        [torch.LongTensor of size (3,)]
+
+        >>>> output, inverse_indices = torch.unique(
+                 torch.LongTensor([1, 3, 2, 3]), sorted=True, return_inverse=True)
+        >>>> output
+
+         1
+         2
+         3
+        [torch.LongTensor of size (3,)]
+
+        >>>> inverse_indices
+
+         0
+         2
+         1
+         2
+        [torch.LongTensor of size (4,)]
+
+        >>>> output, inverse_indices = torch.unique(
+                 torch.LongTensor([[1, 3], [2, 3]]), sorted=True, return_inverse=True)
+        >>>> output
+
+         1
+         2
+         3
+        [torch.LongTensor of size (3,)]
+
+        >>>> inverse_indices
+
+         0  2
+         1  2
+        [torch.LongTensor of size (2,2)]
+    """
+    output, inverse_indices = torch._C._VariableBase._unique(
+        input,
+        sorted=sorted,
+        return_inverse=return_inverse,
+    )
+    if return_inverse:
+        return output, inverse_indices
+    else:
+        return output