pytorch · XilunWu · Jan 5, 2023 · Jan 5, 2023 · Jan 5, 2023 · Jan 5, 2023
diff --git a/test/distributed/_tensor/test_device_mesh.py b/test/distributed/_tensor/test_device_mesh.py
@@ -1,5 +1,7 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates
 # Owner(s): ["oncall: distributed"]
+import os
+import sys
 
 import torch
 from torch.distributed._tensor.device_mesh import DeviceMesh
@@ -8,6 +10,7 @@
 from torch.distributed.distributed_c10d import (
     get_global_rank,
     get_world_size,
+    is_initialized,
     new_group,
     ProcessGroup,
 )
@@ -16,13 +19,30 @@
     DTensorTestBase,
     with_comms,
 )
+from torch.testing._internal.common_distributed import TEST_SKIPS
 
 
 class DeviceMeshTest(DTensorTestBase):
     @property
     def world_size(self):
         return 8
 
+    def test_init_process_group(self):
+        device_type = "cuda" if torch.cuda.is_available() else "cpu"
+        backend = "nccl" if device_type == "cuda" else "gloo"
+        # skip the test if not enough GPUs
+        if backend == "nccl" and torch.cuda.device_count() < self.world_size:
+            sys.exit(TEST_SKIPS[f"multi-gpu-{self.world_size}"].exit_code)
+        mesh_tensor = torch.arange(4).reshape(2, 2)
+        self.assertTrue(not is_initialized())
+        os.environ["MASTER_ADDR"] = "localhost"
+        os.environ["MASTER_PORT"] = "25364"
+        os.environ["WORLD_SIZE"] = f"{self.world_size}"
+        os.environ["RANK"] = f"{self.rank}"
+        mesh = DeviceMesh(device_type, mesh_tensor)
+        self.assertTrue(is_initialized())
+        self.destroy_pg()
+
     @with_comms
     def test_device_mesh_2d(self):
         mesh_tensor = torch.arange(4).reshape(2, 2)

diff --git a/torch/distributed/_tensor/device_mesh.py b/torch/distributed/_tensor/device_mesh.py
@@ -13,6 +13,8 @@
     get_rank,
     get_world_size,
     GroupMember,
+    init_process_group,
+    is_initialized,
     new_group,
     ProcessGroup,
     reduce_scatter,
@@ -109,7 +111,7 @@ def __init__(
             if isinstance(mesh, torch.Tensor)
             else torch.tensor(mesh, dtype=torch.int)
         )
-        default_pg = _get_default_group()
+        default_pg = self._get_or_create_default_group()
         self._backend = default_pg._get_backend_name()
         # TODO: if user want to pass pg_options, offer a way to do it
         # check default pg backend, should support device_type
@@ -215,6 +217,12 @@ def __init__(
                             )
                         self._dim_groups.append(new_subgroup)
 
+    def _get_or_create_default_group(self):
+        if not is_initialized():
+            _backend = "gloo" if self.device_type == "cpu" else "nccl"
+            init_process_group(backend=_backend)
+        return _get_default_group()
+
     def __enter__(self) -> "DeviceMesh":
         # set global device_mesh to this instance
         set_global_device_mesh(self)