pytorch · colesbury · May 10, 2018 · May 10, 2018
diff --git a/test/test_cuda.py b/test/test_cuda.py
@@ -817,6 +817,8 @@ def test_type_conversions(self):
     def test_type_conversions_same_gpu(self):
         x = torch.randn(5, 5).cuda(1)
         self.assertEqual(x.int().get_device(), 1)
+        self.assertEqual(x.type(torch.int).get_device(), 1)
+        self.assertEqual(x.to(torch.int).get_device(), 1)
 
     def test_neg(self):
         TestTorch._test_neg(self, lambda t: t.cuda())

diff --git a/tools/autograd/templates/python_variable_methods.cpp b/tools/autograd/templates/python_variable_methods.cpp
@@ -628,7 +628,7 @@ static PyObject * THPVariable_type(PyObject* self, PyObject* args, PyObject* kwa
   auto self_device_type = torch::getDeviceType(self_.type());
   auto& type = is_dtype ? torch::getType(r.scalartype(0), *torch::getLayout(self_.type().backend()), self_device_type) :
                           torch::utils::type_from_string(type_name);
-  return THPVariable_Wrap(torch::utils::dispatch_type_conversion(self_, type, -1, r.toBool(1)));
+  return THPVariable_Wrap(torch::utils::dispatch_type_conversion(self_, type, at::nullopt, r.toBool(1)));
   END_HANDLE_TH_ERRORS
 }
 

diff --git a/torch/csrc/utils/tensor_conversion_dispatch.cpp b/torch/csrc/utils/tensor_conversion_dispatch.cpp
@@ -8,19 +8,19 @@
 
 namespace torch { namespace utils {
 
-at::Tensor dispatch_type_conversion(const at::Tensor & self, const at::Type & type) {
-  int64_t device = self.is_cuda() ? self.get_device() : -1;
-  return dispatch_type_conversion(self, type, device, false);
-}
-
-at::Tensor dispatch_type_conversion(const at::Tensor & self, const at::Type & type,
-                                    int device, bool non_blocking) {
+at::Tensor dispatch_type_conversion(
+    const at::Tensor & self,
+    const at::Type & type,
+    at::optional<int> device,
+    bool non_blocking) {
   if (type.is_cuda()) {
     torch::utils::cuda_lazy_init();
   }
   AutoNoGIL no_gil;
-  AutoGPU auto_gpu(device);
+
   int64_t tensor_device = self.is_cuda() ? self.get_device() : -1;
+  AutoGPU auto_gpu(device.value_or(tensor_device));
+
   if (self.is_cuda() && type.is_cuda() && tensor_device != at::current_device()) {
     // copy if the devices are different even if the types are the same
     return type.copy(self, non_blocking);

diff --git a/torch/csrc/utils/tensor_conversion_dispatch.h b/torch/csrc/utils/tensor_conversion_dispatch.h
@@ -6,8 +6,24 @@
 
 namespace torch { namespace utils {
 
-at::Tensor dispatch_type_conversion(const at::Tensor & self, const at::Type & type);
-at::Tensor dispatch_type_conversion(const at::Tensor & self, const at::Type & type,
-                                    int device, bool non_blocking);
+// Returns a tensor with the same data as `self` and the specified type and
+// device. Returns `self` unmodified if neither the type nor device change;
+// otherwise a copy is made.
+//
+// The `device` argument is only relevant if `type` is a CUDA type. There are
+// a few special cases for device:
+//
+//  - if device is -1 then the returned tensor will be on the current device
+//  - if device is nullopt then the returned tensor will be on the same device
+//    as `self` if possible; otherwise it will be on the current device.
+//
+// If `non_blocking` is true, then the copy may be performed asynchronously
+// w.r.t the host if `self` is a CPU tensor in pinned memory and `type` is a
+// CUDA type. Note that copies between CUDA devices are always asynchronous
+// w.r.t the host.
+at::Tensor dispatch_type_conversion(const at::Tensor & self,
+                                    const at::Type & type,
+                                    at::optional<int> device=at::nullopt,
+                                    bool non_blocking=false);
 
 }} // namespace torch::utils