2929from torch .testing ._internal .common_distributed import MultiProcessTestCase , \
3030 requires_gloo , requires_nccl , requires_nccl_version , \
3131 skip_if_not_multigpu , skip_if_lt_x_gpu , get_timeout , skip_if_rocm , \
32- simple_sparse_reduce_tests , skip_if_win32 , create_device
32+ skip_if_rocm_single_process , simple_sparse_reduce_tests , skip_if_win32 , \
33+ create_device
3334
3435from torch .testing ._internal .common_utils import TestCase , load_tests , run_tests , \
3536 retry_on_connect_failures , ADDRESS_IN_USE , CONNECT_TIMEOUT , TEST_WITH_TSAN
@@ -1594,13 +1595,30 @@ def create(num, prefix):
15941595 self .assertEqual (torch .full ([10 , 10 ], float (self .world_size )), tensor )
15951596 del pg
15961597
1598+ class ProcessGroupNCCLNoGPUTest (TestCase ):
1599+ MAIN_PROCESS_RANK = 0
1600+
1601+ def setUp (self ):
1602+ self .rank = self .MAIN_PROCESS_RANK
1603+ self .world_size = 1
1604+ self .file = tempfile .NamedTemporaryFile (delete = False )
1605+ self .num_gpus = torch .cuda .device_count ()
1606+ if self .num_gpus > 0 :
1607+ raise unittest .SkipTest ("GPUs are available, skipping test" )
1608+
1609+ def tearDown (self ):
1610+ pass
1611+
1612+ @requires_nccl ()
1613+ @skip_if_rocm_single_process
1614+ def test_init_no_gpus (self ):
1615+ store = c10d .FileStore (self .file .name , self .world_size )
1616+ with self .assertRaisesRegex (
1617+ RuntimeError ,
1618+ "ProcessGroupNCCL is only supported with GPUs, no GPUs found!" ):
1619+ c10d .ProcessGroupNCCL (store , self .rank , self .world_size )
1620+
15971621
1598- @requires_nccl ()
1599- @unittest .skipIf (
1600- TEST_WITH_TSAN ,
1601- "TSAN is not fork-safe since we're forking in a multi-threaded environment" ,
1602- )
1603- @skip_if_rocm
16041622class ProcessGroupNCCLTest (TestCase ):
16051623 MAIN_PROCESS_RANK = 0
16061624
@@ -1615,6 +1633,8 @@ def setUp(self):
16151633 def tearDown (self ):
16161634 pass
16171635
1636+ @requires_nccl ()
1637+ @skip_if_rocm_single_process
16181638 def test_empty_tensors (self ):
16191639 store = c10d .FileStore (self .file .name , self .world_size )
16201640 pg = c10d .ProcessGroupNCCL (store , self .rank , self .world_size )
@@ -1639,6 +1659,8 @@ def test_empty_tensors(self):
16391659 pg .reduce_scatter (ys , xs ).wait ()
16401660 self .assertEqual (0 , ys [0 ].numel ())
16411661
1662+ @requires_nccl ()
1663+ @skip_if_rocm_single_process
16421664 def test_broadcast_ops (self ):
16431665 store = c10d .FileStore (self .file .name , self .world_size )
16441666 pg = c10d .ProcessGroupNCCL (store , self .rank , self .world_size )
@@ -1661,6 +1683,8 @@ def broadcast(xs, rootRank, rootTensor):
16611683 for i in range (self .num_gpus ):
16621684 self .assertEqual (tensors [i ], tensors [rt ])
16631685
1686+ @requires_nccl ()
1687+ @skip_if_rocm_single_process
16641688 def test_allreduce_ops (self ):
16651689 store = c10d .FileStore (self .file .name , self .world_size )
16661690 pg = c10d .ProcessGroupNCCL (store , self .rank , self .world_size )
@@ -1722,6 +1746,8 @@ def allreduce(tensors, op):
17221746 with self .assertRaisesRegex (RuntimeError , "Cannot use " + str (op ) + " with NCCL" ):
17231747 allreduce (tensors , op )
17241748
1749+ @requires_nccl ()
1750+ @skip_if_rocm_single_process
17251751 def test_reduce_ops (self ):
17261752 store = c10d .FileStore (self .file .name , self .world_size )
17271753 pg = c10d .ProcessGroupNCCL (store , self .rank , self .world_size )
@@ -1752,6 +1778,8 @@ def reduce(xs, rootRank, rootTensor, op=None):
17521778 with self .assertRaisesRegex (RuntimeError , "Cannot use " + str (op ) + " with NCCL" ):
17531779 reduce (tensors , self .rank , rt , op )
17541780
1781+ @requires_nccl ()
1782+ @skip_if_rocm_single_process
17551783 def test_allgather_ops (self ):
17561784 store = c10d .FileStore (self .file .name , self .world_size )
17571785 pg = c10d .ProcessGroupNCCL (store , self .rank , self .world_size )
@@ -1777,6 +1805,8 @@ def allgather(output_ts, input_ts):
17771805 for s_idx , t in enumerate (device_ts ):
17781806 self .assertEqual (torch .tensor ([s_idx ]), t )
17791807
1808+ @requires_nccl ()
1809+ @skip_if_rocm_single_process
17801810 def test_reduce_scatter_ops (self ):
17811811 store = c10d .FileStore (self .file .name , self .world_size )
17821812 pg = c10d .ProcessGroupNCCL (store , self .rank , self .world_size )
@@ -1854,6 +1884,8 @@ def reduce_scatter(outputs, input_lists, op):
18541884 # TODO(#38095): Replace assertEqualIgnoreType. See issue #38095
18551885 self .assertEqualIgnoreType (expected , output [i ])
18561886
1887+ @requires_nccl ()
1888+ @skip_if_rocm_single_process
18571889 def test_barrier (self ):
18581890 store = c10d .FileStore (self .file .name , self .world_size )
18591891 pg = c10d .ProcessGroupNCCL (store , self .rank , self .world_size )
0 commit comments