Remove ProcessGroupRoundRobin

H-Huang · H-Huang · commit abd3b44e639c · 2022-10-31T16:33:13.000-07:00
ghstack-source-id: 4e095a2 Pull Request resolved: #87088
diff --git a/build_variables.bzl b/build_variables.bzl
@@ -510,7 +510,6 @@ libtorch_distributed_extra_sources = [
     "torch/csrc/distributed/autograd/rpc_messages/rref_backward_req.cpp",
     "torch/csrc/distributed/autograd/rpc_messages/rref_backward_resp.cpp",
     "torch/csrc/distributed/c10d/HashStore.cpp",
-    "torch/csrc/distributed/c10d/ProcessGroupRoundRobin.cpp",
     "torch/csrc/distributed/rpc/agent_utils.cpp",
     "torch/csrc/distributed/rpc/message.cpp",
     "torch/csrc/distributed/rpc/profiler/remote_profiler_manager.cpp",
diff --git a/test/distributed/test_c10d_gloo.py b/test/distributed/test_c10d_gloo.py
@@ -36,7 +36,6 @@
     requires_gloo,
     skip_if_lt_x_gpu,
     simple_sparse_reduce_tests,
-    skip_if_win32,
     create_device,
     verify_ddp_error_logged,
 )
@@ -1415,57 +1414,6 @@ def test_barrier_implies_wait(self):
         for i, tensor in enumerate(tensors):
             self.assertEqual(torch.full(size, float(i * self.world_size)), tensor)
 
-    @skip_if_win32()
-    @requires_gloo()
-    def test_round_robin(self):
-        num_process_groups = 2
-        store = c10d.FileStore(self.file_name, self.world_size)
-        pg = c10d._round_robin_process_groups(
-            [
-                c10d.ProcessGroupGloo(
-                    c10d.PrefixStore(str(i), store),
-                    self.rank,
-                    self.world_size,
-                    self.opts(),
-                )
-                for i in range(num_process_groups)
-            ]
-        )
-
-        # Run a few collectives so that we have called each process group
-        for _ in range(num_process_groups + 1):
-            tensor = torch.full([100, 100], float(self.rank))
-            pg.broadcast(tensor, root=0).wait()
-            self.assertEqual(torch.full([100, 100], 0.0), tensor)
-
-    @skip_if_win32()
-    @requires_gloo()
-    def test_round_robin_create_destroy(self):
-        store = c10d.FileStore(self.file_name, self.world_size)
-
-        def create(num, prefix):
-            return c10d._round_robin_process_groups(
-                [
-                    c10d.ProcessGroupGloo(
-                        c10d.PrefixStore("%s/%d" % (prefix, i), store),
-                        self.rank,
-                        self.world_size,
-                        self.opts(),
-                    )
-                    for i in range(num)
-                ]
-            )
-
-        # Run create/use/destroy twice
-        for i in range(2):
-            num_process_groups = 2
-            pg = create(num=num_process_groups, prefix=i)
-            for _ in range(3):
-                tensor = torch.ones([10, 10])
-                pg.allreduce(tensor).wait()
-                self.assertEqual(torch.full([10, 10], float(self.world_size)), tensor)
-            del pg
-
 
 class DistributedDataParallelTest(
     test_c10d_common.CommonDistributedDataParallelTest, MultiProcessTestCase
diff --git a/torch/_C/_distributed_c10d.pyi b/torch/_C/_distributed_c10d.pyi
@@ -336,10 +336,6 @@ class ProcessGroup:
 
 class ProcessGroupRoundRobin(ProcessGroup): ...
 
-def _round_robin_process_groups(
-    process_groups: List[ProcessGroup],
-) -> ProcessGroupRoundRobin: ...
-
 class ProcessGroupGloo(ProcessGroup):
     class Device: ...
     class Options: ...
diff --git a/torch/csrc/distributed/c10d/ProcessGroupRoundRobin.cpp b/torch/csrc/distributed/c10d/ProcessGroupRoundRobin.cpp
diff --git a/torch/csrc/distributed/c10d/ProcessGroupRoundRobin.hpp b/torch/csrc/distributed/c10d/ProcessGroupRoundRobin.hpp
diff --git a/torch/csrc/distributed/c10d/init.cpp b/torch/csrc/distributed/c10d/init.cpp
@@ -6,7 +6,6 @@
 #include <torch/csrc/distributed/c10d/Utils.hpp>
 #ifndef _WIN32
 #include <torch/csrc/distributed/c10d/HashStore.hpp>
-#include <torch/csrc/distributed/c10d/ProcessGroupRoundRobin.hpp>
 #endif
 #include <torch/csrc/distributed/c10d/ProcessGroup.hpp>
 #include <torch/csrc/distributed/c10d/PyProcessGroup.hpp>
@@ -1457,22 +1456,6 @@ options :class:`~torch.distributed.ProcessGroupNCCL.Options`).
           .def_readonly("backend", &::c10d::ProcessGroup::Options::backend)
           .def_readwrite("_timeout", &::c10d::ProcessGroup::Options::timeout);
 
-#ifndef _WIN32
-  module.def(
-      "_round_robin_process_groups",
-      [](std::vector<c10::intrusive_ptr<::c10d::ProcessGroup>> processGroups)
-          -> c10::intrusive_ptr<::c10d::ProcessGroup> {
-        if (processGroups.size() == 0) {
-          throw std::invalid_argument("Specify at least 1 process group");
-        }
-        const auto& first = processGroups.front();
-        return c10::make_intrusive<::c10d::ProcessGroupRoundRobin>(
-            first->getRank(), first->getSize(), std::move(processGroups));
-      },
-      py::arg("process_groups"),
-      py::call_guard<py::gil_scoped_release>());
-#endif
-
 #ifdef USE_C10D_GLOO
   static const std::string GLOO_SOCKET_IFNAME_ENV = "GLOO_SOCKET_IFNAME";
 
diff --git a/torch/distributed/__init__.py b/torch/distributed/__init__.py
@@ -49,7 +49,6 @@ def is_available() -> bool:
     if sys.platform != "win32":
         from torch._C._distributed_c10d import (
             HashStore,
-            _round_robin_process_groups,
         )
 
     from .distributed_c10d import *  # noqa: F403

Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,6 @@ def is_available() -> bool:`
`49`	`49`	`if sys.platform != "win32":`
`50`	`50`	`from torch._C._distributed_c10d import (`
`51`	`51`	`HashStore,`
`52`		`- _round_robin_process_groups,`
`53`	`52`	`)`
`54`	`53`
`55`	`54`	`from .distributed_c10d import * # noqa: F403`