repush commit

guangyey · guangyey · commit 97e63b425ba1 · 2022-11-30T08:41:47.000Z
diff --git a/test/test_torch.py b/test/test_torch.py
@@ -358,7 +358,7 @@ def test_storage_meta_errors(self, device, dtype):
             s0.tolist()
 
         with tempfile.NamedTemporaryFile() as f:
-            with self.assertRaisesRegex(RuntimeError, r'Device not recognized'):
+            with self.assertRaisesRegex(NotImplementedError, r'Cannot copy out'):
                 s0._write_file(f, True, True, s0.element_size())
 
         for device in ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']:
diff --git a/torch/csrc/serialization.cpp b/torch/csrc/serialization.cpp
@@ -1,6 +1,7 @@
 #include <torch/csrc/python_headers.h>
 #include <system_error>
 
+#include <ATen/ops/from_blob.h>
 #include <c10/core/CPUAllocator.h>
 #include <torch/csrc/THP.h>
 #include <torch/csrc/serialization.h>
@@ -228,32 +229,22 @@ void THPStorage_writeFileRaw(
   // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
   uint8_t* data;
   // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-  std::unique_ptr<char[]> cpu_data;
+  at::Tensor cpu_tensor;
   int64_t size_bytes = self->nbytes();
   int64_t numel = size_bytes / element_size;
   if (self->device_type() == at::kCPU) {
     data = self->data<uint8_t>();
-#if defined(USE_CUDA) && defined(TORCH_HIP_VERSION) && \
-    (TORCH_HIP_VERSION >= 301)
-  } else if (self->device_type() == at::kCUDA) {
-    cpu_data = std::unique_ptr<char[]>(new char[size_bytes]);
-    data = (uint8_t*)cpu_data.get();
-    C10_CUDA_CHECK(hipMemcpyWithStream(
-        data,
-        self->data<uint8_t>(),
-        size_bytes,
-        cudaMemcpyDeviceToHost,
-        c10::hip::getCurrentHIPStreamMasqueradingAsCUDA()));
-#elif defined(USE_CUDA)
-  } else if (self->device_type() == at::kCUDA) {
-    cpu_data = std::unique_ptr<char[]>(new char[size_bytes]);
-    data = (uint8_t*)cpu_data.get();
-    C10_CUDA_CHECK(cudaMemcpy(
-        data, self->data<uint8_t>(), size_bytes, cudaMemcpyDeviceToHost));
-#endif
   } else {
-    TORCH_CHECK(
-        false, "writeFileRaw: Device not recognized: ", self->device_type());
+    // Here we use a tensor.to() to impl D2H for all non-CPU device.
+    auto device_tensor = at::from_blob(
+        self->data<void>(),
+        {size_bytes},
+        {1},
+        NULL,
+        at::device(self->device()).dtype(c10::kByte),
+        {self->device()});
+    cpu_tensor = device_tensor.to(at::kCPU);
+    data = (uint8_t*)cpu_tensor.data_ptr();
   }
   if (save_size) {
     if (torch::utils::THP_nativeByteOrder() ==
@@ -409,22 +400,19 @@ c10::intrusive_ptr<c10::StorageImpl> THPStorage_readFileRaw(
     }
   }
 
-#if defined(USE_CUDA) && defined(TORCH_HIP_VERSION) && \
-    (TORCH_HIP_VERSION >= 301)
-  if (storage->device_type() == at::kCUDA) {
-    C10_CUDA_CHECK(hipMemcpyWithStream(
-        storage->data<uint8_t>(),
-        data,
-        nbytes,
-        cudaMemcpyHostToDevice,
-        c10::hip::getCurrentHIPStreamMasqueradingAsCUDA()));
-  }
-#elif defined(USE_CUDA)
-  if (storage->device_type() == at::kCUDA) {
-    C10_CUDA_CHECK(cudaMemcpy(
-        storage->data<uint8_t>(), data, nbytes, cudaMemcpyHostToDevice));
+  if (storage->device_type() != at::kCPU) {
+    // Here we use a tensor.copy_() to impl H2D for all non-CPU device.
+    auto cpu_tensor = at::from_blob(
+        (void*)data, {nbytes}, at::device(at::kCPU).dtype(c10::kByte));
+    auto device_tensor = at::from_blob(
+        storage->data<void>(),
+        {nbytes},
+        {1},
+        NULL,
+        at::device(storage->device()).dtype(c10::kByte),
+        {storage->device()});
+    device_tensor.copy_(cpu_tensor);
   }
-#endif
   return storage;
 }