[PT-D] Enable Meta Tensor Support for DTensor

fduwjj · fduwjj · commit de1ad0ac1ef9 · 2023-01-20T19:38:27.000Z
ghstack-source-id: 7b2b65c Pull Request resolved: #92652
diff --git a/test/distributed/_tensor/test_dtensor.py b/test/distributed/_tensor/test_dtensor.py
@@ -13,20 +13,6 @@
 
 
 class DTensorTest(DTensorTestBase):
-    # @with_comms
-    # def test_tensor_constructor(self):
-    #     import torch.distributed._tensor as dist_tensor
-    #     shard_spec = PlacementSpec(device_mesh, strategies=[Shard(0)])
-    #     empty_tensor = dist_tensor.empty((12, 10), placement_spec=shard_spec)
-    #     zero_tensor = dist_tensor.zeros((12, 10), placement_spec=shard_spec)
-    #     one_tensor = dist_tensor.ones((12, 10), placement_spec=shard_spec)
-
-    #     zero_cuda_tensor = dist_tensor.zeros((12, 10), device="cuda", placement_spec=shard_spec)
-
-    #     dist_tensor.empty_like(empty_tensor)
-    #     dist_tensor.zero_like(empty_tensor)
-    #     dist_tensor.one_like(empty_tensor)
-
     @with_comms
     def test_dtensor_constructor(self):
         device_mesh = DeviceMesh(self.device_type, list(range(self.world_size)))
@@ -55,6 +41,21 @@ def test_dtensor_constructor(self):
                 requires_grad=True,
             )
 
+    @with_comms
+    def test_meta_dtensor(self):
+        device_mesh = DeviceMesh(self.device_type, list(range(self.world_size)))
+        dist_specs = [[Shard(0)], [Replicate()]]
+        meta_tensor = torch.randn(1024, 2048, device="meta")
+        for dist_spec in dist_specs:
+            # Test distribute_tensor on meta tensor
+            meta_dtensor = distribute_tensor(meta_tensor, device_mesh, dist_spec)
+            torch.nn.init.constant_(meta_dtensor, 1.2)
+            self.assertEqual(meta_dtensor.device.type, self.device_type)
+            # Test from_local on meta tensor
+            meta_dtensor = DTensor.from_local(meta_tensor, device_mesh, dist_spec)
+            torch.nn.init.constant_(meta_dtensor, 1.5)
+            self.assertEqual(meta_dtensor.device.type, self.device_type)
+
     @with_comms
     def test_dtensor_stride(self):
         device_mesh = DeviceMesh(self.device_type, list(range(self.world_size)))
diff --git a/torch/distributed/_tensor/__init__.py b/torch/distributed/_tensor/__init__.py
@@ -40,7 +40,8 @@ def distribute_tensor(
     # get default device mesh if there's nothing specified
     device_mesh = get_global_device_mesh() if device_mesh is None else device_mesh
     # convert tensor to the correponding device type if it's not in that device type
-    tensor = tensor.to(device_mesh.device_type)
+    if not tensor.is_meta:
+        tensor = tensor.to(device_mesh.device_type)
     # set default placements to replicated if not specified
     if placements is None:
         placements = [Replicate() for _ in range(device_mesh.ndim)]
diff --git a/torch/distributed/_tensor/api.py b/torch/distributed/_tensor/api.py
@@ -277,7 +277,8 @@ def from_local(
         # in the mesh dimension
         device_mesh = get_global_device_mesh() if device_mesh is None else device_mesh
         # convert the local tensor to desired device base on device mesh's device_type
-        local_tensor = local_tensor.to(device_mesh.device_type)
+        if not local_tensor.is_meta:
+            local_tensor = local_tensor.to(device_mesh.device_type)
 
         # set default placements to replicated if not specified
         if placements is None:
diff --git a/torch/distributed/_tensor/device_mesh.py b/torch/distributed/_tensor/device_mesh.py
@@ -283,6 +283,12 @@ def scatter(
         Returns:
             A :class:`Work` object
         """
+        # TODO: Ideally we should use the meta tensor way 
+        # (to register a meta kernel for the collective op)
+        # so that it would avoid the communication. Need to
+        # remove the check below once that is done.
+        if output.is_meta:
+            return None
         dim_group = self._dim_groups[mesh_dim]
         # src need to be global rank
         src_for_dim = 0
@@ -330,6 +336,12 @@ def broadcast(
         Returns:
             A :class:`Work` object
         """
+        # TODO: Ideally we should use the meta tensor way 
+        # (to register a meta kernel for the collective op)
+        # so that it would avoid the communication. Need to
+        # remove the check below once that is done.
+        if tensor.is_meta:
+            return None
         dim_group = self._dim_groups[mesh_dim]
         # src need to be global rank
         src_for_dim = 0