revert dense_flat changes

ssnl · ssnl · commit 115c8d5f087f · 2018-06-18T13:04:42.000-04:00
diff --git a/torch/csrc/cuda/comm.cpp b/torch/csrc/cuda/comm.cpp
@@ -48,9 +48,15 @@ std::vector<Tensor> broadcast(const Tensor& tensor, IntList devices) {
   {
 #endif
     auto & gpu_type = type.toBackend(type.is_sparse() ? at::kSparseCUDA : at::kCUDA);
-    for (auto device : devices) {
+    if (type.is_cuda()) {
+      tensors.push_back(tensor);
+    } else {
+      AutoGPU _gpu_guard(devices[0]);
+      tensors.push_back(gpu_type.copy(tensor, true));
+    }
+    for (auto device : devices.slice(1)) {
       AutoGPU _gpu_guard(device);
-      tensors.push_back(tensor.toType(gpu_type, true));
+      tensors.push_back(gpu_type.copy(tensor, true));
     }
   }
   return tensors;
diff --git a/torch/csrc/utils/tensor_flatten.h b/torch/csrc/utils/tensor_flatten.h
@@ -9,22 +9,10 @@
 namespace torch { namespace utils {
 
 inline at::Tensor flatten_dense_tensors(at::TensorList tensors) {
-  if (tensors.size() == 1) {
-    return tensors[0].reshape({-1});
-  } else {
-    int64_t total_numel = 0;
-    for (const auto & tensor : tensors) {
-      total_numel += tensor.numel();
-    }
-    auto flat = tensors[0].type().tensor({total_numel});
-    int64_t offset = 0;
-    for (const auto & tensor : tensors) {
-      auto numel = tensor.numel();
-      flat.narrow(0, offset, numel).view_as(tensor).copy_(tensor);
-      offset += numel;
-    }
-    return flat;
-  }
+  static auto flatten = [](const at::Tensor &t) { return t.contiguous().view({-1}); };
+  if (tensors.size() == 1)
+    return flatten(tensors[0]);
+  return at::cat(fmap(tensors, flatten));
 }
 
 inline std::vector<at::Tensor> unflatten_dense_tensors(const at::Tensor& flat, at::TensorList tensors) {

Original file line number	Diff line number	Diff line change
`@@ -48,9 +48,15 @@ std::vector<Tensor> broadcast(const Tensor& tensor, IntList devices) {`
`48`	`48`	`{`
`49`	`49`	`#endif`
`50`	`50`	`auto & gpu_type = type.toBackend(type.is_sparse() ? at::kSparseCUDA : at::kCUDA);`
`51`		`- for (auto device : devices) {`
	`51`	`+ if (type.is_cuda()) {`
	`52`	`+ tensors.push_back(tensor);`
	`53`	`+ } else {`
	`54`	`+ AutoGPU _gpu_guard(devices[0]);`
	`55`	`+ tensors.push_back(gpu_type.copy(tensor, true));`
	`56`	`+ }`
	`57`	`+ for (auto device : devices.slice(1)) {`
`52`	`58`	`AutoGPU _gpu_guard(device);`
`53`		`- tensors.push_back(tensor.toType(gpu_type, true));`
	`59`	`+ tensors.push_back(gpu_type.copy(tensor, true));`
`54`	`60`	`}`
`55`	`61`	`}`
`56`	`62`	`return tensors;`