pytorch
diff --git a/‎torch/distributed/_tensor/ops/math_ops.py‎
Lines changed: 23 additions & 33 deletions b/‎torch/distributed/_tensor/ops/math_ops.py‎
Lines changed: 23 additions & 33 deletions
diff --git a/‎torch/distributed/_tensor/ops/matrix_ops.py‎
Lines changed: 10 additions & 5 deletions b/‎torch/distributed/_tensor/ops/matrix_ops.py‎
Lines changed: 10 additions & 5 deletions
@@ -1,6 +1,8 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates
 from typing import cast, Optional, Sequence
 
+import torch
+
 from torch.distributed._tensor.op_schema import OpSchema, OutputSharding
 from torch.distributed._tensor.ops.common_rules import pointwise_rule, reduction_rule
 from torch.distributed._tensor.ops.utils import (
@@ -11,6 +13,9 @@
 from torch.distributed._tensor.placement_types import DTensorSpec
 
 
+aten = torch.ops.aten
+
+
 def _infer_reduction_dims(dims_arg: object, ndim: int) -> Optional[Sequence[int]]:
     if dims_arg is None:
         return None
@@ -22,11 +27,17 @@ def _infer_reduction_dims(dims_arg: object, ndim: int) -> Optional[Sequence[int]
     return dims
 
 
-@register_prop_rule("aten.all.default")
+@register_prop_rule(aten.all.default)
 def default_reduction_rule(op_schema: OpSchema) -> OutputSharding:
     return reduction_rule(op_schema, reduction_linear=True)
 
 
+@register_prop_rule(
+    [
+        aten.sum.default,
+        aten.sum.dim_IntList,
+    ]
+)
 def sum_rule(op_schema: OpSchema) -> OutputSharding:
     args_schema = op_schema.args_schema
     input_spec = cast(DTensorSpec, args_schema[0])
@@ -40,15 +51,7 @@ def sum_rule(op_schema: OpSchema) -> OutputSharding:
     )
 
 
-sum_ops = [
-    "aten.sum.default",
-    "aten.sum.dim_IntList",
-]
-for sum_op in sum_ops:
-    register_prop_rule(sum_op)(sum_rule)
-
-
-@register_prop_rule("aten._softmax.default")
+@register_prop_rule(aten._softmax.default)
 def softmax_rule(op_schema: OpSchema) -> OutputSharding:
     input_spec, softmax_dim, _ = op_schema.args_schema
     input_spec = cast(DTensorSpec, input_spec)
@@ -59,7 +62,7 @@ def softmax_rule(op_schema: OpSchema) -> OutputSharding:
     return OutputSharding(input_spec)
 
 
-@register_prop_rule("aten._softmax_backward_data.default")
+@register_prop_rule(aten._softmax_backward_data.default)
 def softmax_bwd_rule(op_schema: OpSchema) -> OutputSharding:
     grad_out_spec, out_spec, softmax_dim, _ = op_schema.args_schema
     grad_out_spec = cast(DTensorSpec, grad_out_spec)
@@ -74,6 +77,7 @@ def softmax_bwd_rule(op_schema: OpSchema) -> OutputSharding:
     return pointwise_rule(op_schema)
 
 
+@register_prop_rule([aten.mean.default, aten.mean.dim, aten.mean.out])
 def mean_rule(op_schema: OpSchema) -> OutputSharding:
     args_schema = op_schema.args_schema
     input_spec = cast(DTensorSpec, args_schema[0])
@@ -88,16 +92,13 @@ def mean_rule(op_schema: OpSchema) -> OutputSharding:
     )
 
 
-mean_ops = [
-    "aten.mean.default",
-    "aten.mean.dim",
-    "aten.mean.out",
-]
-
-for mean_op in mean_ops:
-    register_prop_rule(mean_op)(mean_rule)
-
-
+@register_prop_rule(
+    [
+        aten.var.default,
+        aten.var.dim,
+        aten.var.out,
+    ]
+)
 def var_rule(op_schema: OpSchema) -> OutputSharding:
     args_schema = op_schema.args_schema
     input_spec = cast(DTensorSpec, args_schema[0])
@@ -114,18 +115,7 @@ def var_rule(op_schema: OpSchema) -> OutputSharding:
     )
 
 
-var_ops = [
-    "aten.var.default",
-    "aten.var.dim",
-    "aten.var.out",
-]
-
-for var_op in var_ops:
-    register_prop_rule(var_op)(var_rule)
-
-
-@register_prop_rule("aten.var.correction")
-@register_prop_rule("aten.var.correction_out")
+@register_prop_rule([aten.var.correction, aten.var.correction_out])
 def var_correction_rule(op_schema: OpSchema) -> OutputSharding:
     args_schema = op_schema.args_schema
     input_spec = cast(DTensorSpec, args_schema[0])
 
@@ -1,9 +1,14 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates
 # implement matrix related ops for distributed tensor
+
+import torch
+
 from torch.distributed._tensor.op_schema import OpSchema, OutputSharding
 from torch.distributed._tensor.ops.common_rules import einop_rule, pointwise_rule
 from torch.distributed._tensor.ops.utils import register_prop_rule
 
+aten = torch.ops.aten
+
 
 def _update_schema_suggestion_for_addmm(
     output_sharding: OutputSharding,
@@ -41,12 +46,12 @@ def _update_schema_suggestion_for_addmm(
     return output_sharding
 
 
-@register_prop_rule("aten.mm.default")
+@register_prop_rule(aten.mm.default)
 def mm_rules(op_schema: OpSchema) -> OutputSharding:
     return einop_rule("mk,kn->mn", op_schema, linearity=False)
 
 
-@register_prop_rule("aten.addmm.default")
+@register_prop_rule(aten.addmm.default)
 def addmm_rules(op_schema: OpSchema) -> OutputSharding:
     input_spec, mat1_spec, mat2_spec = op_schema.args_spec
     mm_out_sharding = mm_rules(
@@ -80,17 +85,17 @@ def addmm_rules(op_schema: OpSchema) -> OutputSharding:
     return output_sharding
 
 
-@register_prop_rule("aten.t.default")
+@register_prop_rule(aten.t.default)
 def transpose_rule(op_schema: OpSchema) -> OutputSharding:
     return einop_rule("ij->ji", op_schema, linearity=True)
 
 
-@register_prop_rule("aten.bmm.default")
+@register_prop_rule(aten.bmm.default)
 def bmm_rules(op_schema: OpSchema) -> OutputSharding:
     return einop_rule("bmk,bkn->bmn", op_schema, linearity=False)
 
 
-@register_prop_rule("aten.baddbmm.default")
+@register_prop_rule(aten.baddbmm.default)
 def baddbmm_rules(op_schema: OpSchema) -> OutputSharding:
     input_spec, mat1_spec, mat2_spec = op_schema.args_spec
     bmm_output_sharding = bmm_rules(