Skip to content

Commit e580a66

Browse files
H-Huangfacebook-github-bot
authored andcommitted
Update ProcessGroupRoundRobin (#91172)
Summary: Pull Request resolved: #91172 Temporary fix to unblock jobs in https://fb.workplace.com/groups/300451907202972/permalink/906337097050850/ Real fix would be to remove use of _round_robin_process_group API and update corresponding references (e.g. PyText) Test Plan: sandcastle Differential Revision: D42169592 fbshipit-source-id: 69b40e4d6f8da7727a0bf8bec80e5c8530f6fabc
1 parent 3194281 commit e580a66

File tree

4 files changed

+11
-13
lines changed

4 files changed

+11
-13
lines changed

test/distributed/test_c10d_gloo.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,11 +1415,10 @@ def test_barrier_implies_wait(self):
14151415
def test_round_robin(self):
14161416
num_process_groups = 2
14171417
store = c10d.FileStore(self.file_name, self.world_size)
1418+
c10d.init_process_group(backend="gloo", store=store, rank=self.rank, world_size=self.world_size)
14181419
pg = c10d._round_robin_process_groups(
14191420
[
1420-
self._create_process_group_gloo(
1421-
c10d.PrefixStore(str(i), store), self.rank, self.world_size, self.opts()
1422-
)
1421+
c10d.new_group(pg_options=self.opts())
14231422
for i in range(num_process_groups)
14241423
]
14251424
)
@@ -1434,13 +1433,12 @@ def test_round_robin(self):
14341433
@requires_gloo()
14351434
def test_round_robin_create_destroy(self):
14361435
store = c10d.FileStore(self.file_name, self.world_size)
1436+
c10d.init_process_group(backend="gloo", store=store, rank=self.rank, world_size=self.world_size)
14371437

14381438
def create(num, prefix):
14391439
return c10d._round_robin_process_groups(
14401440
[
1441-
self._create_process_group_gloo(
1442-
c10d.PrefixStore("%s/%d" % (prefix, i), store), self.rank, self.world_size, self.opts()
1443-
)
1441+
c10d.new_group(pg_options=self.opts())
14441442
for i in range(num)
14451443
]
14461444
)

torch/csrc/distributed/c10d/ProcessGroupRoundRobin.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ namespace c10d {
55
ProcessGroupRoundRobin::ProcessGroupRoundRobin(
66
int rank,
77
int size,
8-
std::vector<c10::intrusive_ptr<Backend>> processGroups)
8+
std::vector<c10::intrusive_ptr<ProcessGroup>> processGroups)
99
: ProcessGroup(rank, size), processGroups_(std::move(processGroups)) {
1010
TORCH_WARN(
1111
"ProcessGroupRoundRobin is deprecated and scheduled to be removed after this current release (1.13). ",
@@ -114,7 +114,7 @@ c10::intrusive_ptr<Work> ProcessGroupRoundRobin::barrier(
114114
TORCH_CHECK(false, "ProcessGroupRoundRobin does not support barrier");
115115
};
116116

117-
const c10::intrusive_ptr<Backend>& ProcessGroupRoundRobin::next() {
117+
const c10::intrusive_ptr<ProcessGroup>& ProcessGroupRoundRobin::next() {
118118
auto& processGroup = *iterator_;
119119
iterator_++;
120120
if (iterator_ == processGroups_.end()) {

torch/csrc/distributed/c10d/ProcessGroupRoundRobin.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class TORCH_API ProcessGroupRoundRobin final : public ProcessGroup {
2323
explicit ProcessGroupRoundRobin(
2424
int rank,
2525
int size,
26-
std::vector<c10::intrusive_ptr<Backend>> processGroups);
26+
std::vector<c10::intrusive_ptr<ProcessGroup>> processGroups);
2727

2828
~ProcessGroupRoundRobin() override;
2929

@@ -103,11 +103,11 @@ class TORCH_API ProcessGroupRoundRobin final : public ProcessGroup {
103103
const BarrierOptions& opts = BarrierOptions()) override;
104104

105105
private:
106-
std::vector<c10::intrusive_ptr<Backend>> processGroups_;
107-
std::vector<c10::intrusive_ptr<Backend>>::const_iterator iterator_;
106+
std::vector<c10::intrusive_ptr<ProcessGroup>> processGroups_;
107+
std::vector<c10::intrusive_ptr<ProcessGroup>>::const_iterator iterator_;
108108

109109
// Returns the next ProcessGroup to use.
110-
const c10::intrusive_ptr<Backend>& next();
110+
const c10::intrusive_ptr<ProcessGroup>& next();
111111
};
112112

113113
} // namespace c10d

torch/csrc/distributed/c10d/init.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1627,7 +1627,7 @@ options :class:`~torch.distributed.ProcessGroupNCCL.Options`).
16271627
#ifndef _WIN32
16281628
module.def(
16291629
"_round_robin_process_groups",
1630-
[](std::vector<c10::intrusive_ptr<::c10d::Backend>> processGroups)
1630+
[](std::vector<c10::intrusive_ptr<::c10d::ProcessGroup>> processGroups)
16311631
-> c10::intrusive_ptr<::c10d::ProcessGroup> {
16321632
if (processGroups.size() == 0) {
16331633
throw std::invalid_argument("Specify at least 1 process group");

0 commit comments

Comments
 (0)