Skip to content

Commit abd3b44

Browse files
committed
Remove ProcessGroupRoundRobin
ghstack-source-id: 4e095a2 Pull Request resolved: #87088
1 parent cd9f451 commit abd3b44

File tree

7 files changed

+0
-322
lines changed

7 files changed

+0
-322
lines changed

build_variables.bzl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,6 @@ libtorch_distributed_extra_sources = [
510510
"torch/csrc/distributed/autograd/rpc_messages/rref_backward_req.cpp",
511511
"torch/csrc/distributed/autograd/rpc_messages/rref_backward_resp.cpp",
512512
"torch/csrc/distributed/c10d/HashStore.cpp",
513-
"torch/csrc/distributed/c10d/ProcessGroupRoundRobin.cpp",
514513
"torch/csrc/distributed/rpc/agent_utils.cpp",
515514
"torch/csrc/distributed/rpc/message.cpp",
516515
"torch/csrc/distributed/rpc/profiler/remote_profiler_manager.cpp",

test/distributed/test_c10d_gloo.py

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
requires_gloo,
3737
skip_if_lt_x_gpu,
3838
simple_sparse_reduce_tests,
39-
skip_if_win32,
4039
create_device,
4140
verify_ddp_error_logged,
4241
)
@@ -1415,57 +1414,6 @@ def test_barrier_implies_wait(self):
14151414
for i, tensor in enumerate(tensors):
14161415
self.assertEqual(torch.full(size, float(i * self.world_size)), tensor)
14171416

1418-
@skip_if_win32()
1419-
@requires_gloo()
1420-
def test_round_robin(self):
1421-
num_process_groups = 2
1422-
store = c10d.FileStore(self.file_name, self.world_size)
1423-
pg = c10d._round_robin_process_groups(
1424-
[
1425-
c10d.ProcessGroupGloo(
1426-
c10d.PrefixStore(str(i), store),
1427-
self.rank,
1428-
self.world_size,
1429-
self.opts(),
1430-
)
1431-
for i in range(num_process_groups)
1432-
]
1433-
)
1434-
1435-
# Run a few collectives so that we have called each process group
1436-
for _ in range(num_process_groups + 1):
1437-
tensor = torch.full([100, 100], float(self.rank))
1438-
pg.broadcast(tensor, root=0).wait()
1439-
self.assertEqual(torch.full([100, 100], 0.0), tensor)
1440-
1441-
@skip_if_win32()
1442-
@requires_gloo()
1443-
def test_round_robin_create_destroy(self):
1444-
store = c10d.FileStore(self.file_name, self.world_size)
1445-
1446-
def create(num, prefix):
1447-
return c10d._round_robin_process_groups(
1448-
[
1449-
c10d.ProcessGroupGloo(
1450-
c10d.PrefixStore("%s/%d" % (prefix, i), store),
1451-
self.rank,
1452-
self.world_size,
1453-
self.opts(),
1454-
)
1455-
for i in range(num)
1456-
]
1457-
)
1458-
1459-
# Run create/use/destroy twice
1460-
for i in range(2):
1461-
num_process_groups = 2
1462-
pg = create(num=num_process_groups, prefix=i)
1463-
for _ in range(3):
1464-
tensor = torch.ones([10, 10])
1465-
pg.allreduce(tensor).wait()
1466-
self.assertEqual(torch.full([10, 10], float(self.world_size)), tensor)
1467-
del pg
1468-
14691417

14701418
class DistributedDataParallelTest(
14711419
test_c10d_common.CommonDistributedDataParallelTest, MultiProcessTestCase

torch/_C/_distributed_c10d.pyi

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -336,10 +336,6 @@ class ProcessGroup:
336336

337337
class ProcessGroupRoundRobin(ProcessGroup): ...
338338

339-
def _round_robin_process_groups(
340-
process_groups: List[ProcessGroup],
341-
) -> ProcessGroupRoundRobin: ...
342-
343339
class ProcessGroupGloo(ProcessGroup):
344340
class Device: ...
345341
class Options: ...

torch/csrc/distributed/c10d/ProcessGroupRoundRobin.cpp

Lines changed: 0 additions & 134 deletions
This file was deleted.

torch/csrc/distributed/c10d/ProcessGroupRoundRobin.hpp

Lines changed: 0 additions & 113 deletions
This file was deleted.

torch/csrc/distributed/c10d/init.cpp

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#include <torch/csrc/distributed/c10d/Utils.hpp>
77
#ifndef _WIN32
88
#include <torch/csrc/distributed/c10d/HashStore.hpp>
9-
#include <torch/csrc/distributed/c10d/ProcessGroupRoundRobin.hpp>
109
#endif
1110
#include <torch/csrc/distributed/c10d/ProcessGroup.hpp>
1211
#include <torch/csrc/distributed/c10d/PyProcessGroup.hpp>
@@ -1457,22 +1456,6 @@ options :class:`~torch.distributed.ProcessGroupNCCL.Options`).
14571456
.def_readonly("backend", &::c10d::ProcessGroup::Options::backend)
14581457
.def_readwrite("_timeout", &::c10d::ProcessGroup::Options::timeout);
14591458

1460-
#ifndef _WIN32
1461-
module.def(
1462-
"_round_robin_process_groups",
1463-
[](std::vector<c10::intrusive_ptr<::c10d::ProcessGroup>> processGroups)
1464-
-> c10::intrusive_ptr<::c10d::ProcessGroup> {
1465-
if (processGroups.size() == 0) {
1466-
throw std::invalid_argument("Specify at least 1 process group");
1467-
}
1468-
const auto& first = processGroups.front();
1469-
return c10::make_intrusive<::c10d::ProcessGroupRoundRobin>(
1470-
first->getRank(), first->getSize(), std::move(processGroups));
1471-
},
1472-
py::arg("process_groups"),
1473-
py::call_guard<py::gil_scoped_release>());
1474-
#endif
1475-
14761459
#ifdef USE_C10D_GLOO
14771460
static const std::string GLOO_SOCKET_IFNAME_ENV = "GLOO_SOCKET_IFNAME";
14781461

torch/distributed/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ def is_available() -> bool:
4949
if sys.platform != "win32":
5050
from torch._C._distributed_c10d import (
5151
HashStore,
52-
_round_robin_process_groups,
5352
)
5453

5554
from .distributed_c10d import * # noqa: F403

0 commit comments

Comments
 (0)