[dtensor] add split_with_sizes op (#93957)

wanchaol · pytorchmergebot · commit d05ec0efebc3 · 2023-02-03T04:16:30.000Z
add the split_with_sizes op, sharing with split op impl Pull Request resolved: #93957 Approved by: https://github.com/XilunWu
diff --git a/test/distributed/_tensor/test_dtensor_ops.py b/test/distributed/_tensor/test_dtensor_ops.py
@@ -451,8 +451,6 @@ def wrapped(fn):
     xfail("special.spherical_bessel_j0"),
     xfail("special.xlog1py"),
     xfail("special.zeta"),
-    xfail("split", "list_args"),
-    xfail("split_with_sizes"),
     xfail("squeeze", "multiple"),
     xfail("signal.windows.bartlett"),
     xfail("signal.windows.blackman"),
@@ -617,13 +615,21 @@ def assert_ref_dtensor_equal(self, dtensor_rs, rs):
     def run_dtensor_crossref(self, func, args, kwargs):
         to_dtensor = DTensorConverter(self.mesh, args, kwargs)
 
+        def concat_res_if_necessary(func, res: object) -> object:
+            # concat the result on corresponding dim for ops like
+            # split, so that we can call backward on a single tensor
+            if (
+                (resolve_name(func) is not None)
+                and ("split" in resolve_name(func))
+            ):
+                dim = args[2] if len(args) == 3 else 0
+                return torch.cat(res, dim=dim)
+            else:
+                return res
+
         # TODO: also handle cases where func raise an exception
         rs = func(*args, **kwargs)
-        if (
-            (resolve_name(func) is not None)
-            and ("split" in resolve_name(func))
-        ):
-            rs = torch.cat(rs)
+        rs = concat_res_if_necessary(func, rs)
 
         def to_replicate(e: object) -> object:
             return (
@@ -664,11 +670,7 @@ def to_replicate(e: object) -> object:
 
                         # redistribute/all_gather the results to compare with normal output
                         dtensor_rs = tree_map(to_replicate, dtensor_rs)
-                        if (
-                            (resolve_name(func) is not None)
-                            and ("split" in resolve_name(func))
-                        ):
-                            dtensor_rs = torch.cat(dtensor_rs)
+                        dtensor_rs = concat_res_if_necessary(func, dtensor_rs)
                         try:
                             if resolve_name(func) not in skip_bw:
                                 if isinstance(dtensor_rs, DTensor):
diff --git a/torch/distributed/_tensor/ops/tensor_ops.py b/torch/distributed/_tensor/ops/tensor_ops.py
@@ -600,7 +600,7 @@ def _update_schema_suggestion_for_cat(
     return output_sharding
 
 
-@register_prop_rule(aten.split.Tensor)
+@register_prop_rule([aten.split.Tensor, aten.split_with_sizes.default])
 def split_rule(op_schema: OpSchema) -> OutputSharding:
     output_spec_list: List[DTensorSpec] = []
     input_spec = cast(DTensorSpec, op_schema.args_schema[0])