Update on "[15/N] Add allreduce_coalesced custom op with CPU/CUDA implementations"

H-Huang · H-Huang · commit af4b4060f645 · 2022-11-11T08:21:48.000-08:00
[ghstack-poisoned]
diff --git a/torch/csrc/distributed/c10d/OpsImpl.cpp b/torch/csrc/distributed/c10d/OpsImpl.cpp
@@ -394,14 +394,13 @@ TORCH_LIBRARY_IMPL(c10d, CUDA, m) {
 }
 
 TORCH_LIBRARY_IMPL(c10d, CPU, m) {
-  m.impl("allreduce_coalesced", allreduce_coalesced_cpu_);
+  m.impl("allreduce_coalesced_", allreduce_coalesced_cpu_);
 }
 
 TORCH_LIBRARY_IMPL(c10d, CUDA, m) {
-  m.impl("allreduce_coalesced", allreduce_coalesced_cuda_);
+  m.impl("allreduce_coalesced_", allreduce_coalesced_cuda_);
 }
 
-
 TORCH_LIBRARY_IMPL(c10d, CPU, m) {
   m.impl("allgather_", allgather_cpu_);
 }

Original file line number	Diff line number	Diff line change
`@@ -394,14 +394,13 @@ TORCH_LIBRARY_IMPL(c10d, CUDA, m) {`
`394`	`394`	`}`
`395`	`395`
`396`	`396`	`TORCH_LIBRARY_IMPL(c10d, CPU, m) {`
`397`		`- m.impl("allreduce_coalesced", allreduce_coalesced_cpu_);`
	`397`	`+ m.impl("allreduce_coalesced_", allreduce_coalesced_cpu_);`
`398`	`398`	`}`
`399`	`399`
`400`	`400`	`TORCH_LIBRARY_IMPL(c10d, CUDA, m) {`
`401`		`- m.impl("allreduce_coalesced", allreduce_coalesced_cuda_);`
	`401`	`+ m.impl("allreduce_coalesced_", allreduce_coalesced_cuda_);`
`402`	`402`	`}`
`403`	`403`
`404`		`-`
`405`	`404`	`TORCH_LIBRARY_IMPL(c10d, CPU, m) {`
`406`	`405`	`m.impl("allgather_", allgather_cpu_);`
`407`	`406`	`}`