pytorch
diff --git a/‎py/torch_tensorrt/dynamo/conversion/impl/pool.py‎
Lines changed: 149 additions & 29 deletions b/‎py/torch_tensorrt/dynamo/conversion/impl/pool.py‎
Lines changed: 149 additions & 29 deletions
@@ -5,7 +5,10 @@
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
-from torch_tensorrt.dynamo.conversion.converter_utils import extend_attr_to_tuple
+from torch_tensorrt.dynamo.conversion.converter_utils import (
+    extend_attr_to_tuple,
+    get_positive_dim,
+)
 from torch_tensorrt.fx.converters.converter_utils import (
     has_dynamic_shape,
     set_layer_name,
@@ -116,37 +119,69 @@ def adaptive_avg_poolNd(
     output_size: Sequence[int],
 ) -> TRTTensor:
     input_rank = len(input.shape)
-    if input_rank == 3:
-        input = impl.shuffle.reshape(ctx, target, source_ir, f"{name}_reshape", input, (1, *input.shape))
+
+    if input_rank == 3:  # TRT doesn't support 3D pooling
+        input = impl.shuffle.reshape(
+            ctx, target, source_ir, f"{name}_reshape", input, (1, *input.shape)
+        )
 
     extend_len = len(output_size)
+    output_size = list(output_size)
+    original_input = input
 
-    # pad the input based on output_size if the dim of output is larger than input
-    pad = []
+    # repeat_interleave the input if the dim of output is larger than input
     input_shape = input.shape
-    for i in range(1, extend_len + 1):
-        input_dim = input_shape[-i]
-        output_dim = output_size[-i]
+    insert_axises = []
+    for axis in range(1, extend_len + 1):
+        axis = -axis
+        positive_axis = get_positive_dim(
+            axis, input_rank
+        )  # this is for calculating new shapes below
+        input_dim = input_shape[axis]
+        output_dim = output_size[axis]
         diff = output_dim - input_dim
-        if diff > 0:
-            if diff % 2 == 0:
-                pad.append(diff // 2)
-                pad.append(diff // 2)
-            else:
-                pad.append(diff // 2 + 1)
-                pad.append(diff // 2 + 1)
-        else:
-            pad.append(0)
-            pad.append(0)
-
-    input = impl.pad.replication_padNd(
-        ctx,
-        target,
-        source_ir,
-        f"{name}_replication_padNd",
-        input,
-        pad,
-    )
+        if diff > 0:  # the dim of output is larger than input
+            times = output_dim // input_dim
+            remainder = output_dim % input_dim
+            if (
+                diff == 2 and remainder == 2
+            ):  # case 1: output_dim - input_dim == 2 and is not an integral multiple
+                insert_axises.append(axis)
+                remainder -= 1
+                output_size[axis] -= 1
+
+            if (
+                remainder + 1 == input_dim
+            ):  # case 2: remainder + 1 == input_dim, we will repeat_interleave the whole input
+                remainder = 0
+                times += 1
+
+            flags = []
+            concat_list = []
+            for j in range(input_dim):
+                single_elem = impl.select.select(
+                    ctx, target, source_ir, f"{name}_select_{axis}_{j}", input, axis, j
+                )
+                new_shape = list(single_elem.shape)
+                new_shape.insert(positive_axis, 1)
+                single_elem = impl.shuffle.reshape(
+                    ctx,
+                    target,
+                    source_ir,
+                    f"{name}_reshape_{axis}_{j}",
+                    single_elem,
+                    new_shape,
+                )
+                if remainder > 0 or j in flags:
+                    concat_list.extend([single_elem] * (times + 1))
+                    remainder -= 2
+                    flags.append(input_dim - j - 1)
+                else:
+                    concat_list.extend([single_elem] * times)
+                out = impl.cat.cat(
+                    ctx, target, source_ir, f"{name}_cat_{axis}", concat_list, axis
+                )
+            input = out
 
     stride = tuple(
         input.shape[-extend_len + i] // output_size[i] for i in range(extend_len)
@@ -155,6 +190,20 @@ def adaptive_avg_poolNd(
         input.shape[-extend_len + i] - (output_size[i] - 1) * stride[i]
         for i in range(extend_len)
     )
+
+    # Don't have to pool, directly return
+    if all(s == 1 for s in stride) and all(k == 1 for k in kernel_size):
+        if input_rank == 3:  # reshape back to 3D
+            input = impl.shuffle.reshape(
+                ctx,
+                target,
+                source_ir,
+                f"{name}_reshape_back",
+                input,
+                (*input.shape[1:],),
+            )
+        return input
+
     layer = ctx.net.add_pooling_nd(
         input=input, type=trt.PoolingType.AVERAGE, window_size=kernel_size
     )
@@ -163,7 +212,78 @@ def adaptive_avg_poolNd(
 
     output = layer.get_output(0)
 
-    if input_rank == 3:
-        output = impl.shuffle.reshape(ctx, target, source_ir, f"{name}_reshape_back", output, (*output.shape[1:],))
+    # For case 1, we need to split the output and insert the mid of input
+    for axis in insert_axises:
+        positive_axis = get_positive_dim(axis, input_rank)
+        input_dim = input_shape[axis]
+        output_dim = output_size[axis]
+        if input_dim % 2 == 1:
+            mid = impl.select.select(
+                ctx,
+                target,
+                source_ir,
+                f"{name}_select_{axis}",
+                original_input,
+                axis,
+                input_dim // 2,
+            )
+            new_shape = list(mid.shape)
+            new_shape.insert(positive_axis, 1)
+            mid = impl.shuffle.reshape(
+                ctx, target, source_ir, f"{name}_reshape_{axis}", mid, new_shape
+            )
+            split_output = impl.split.split(
+                ctx, target, source_ir, f"{name}_split_{axis}", output, 2, axis
+            )
+            split_output.insert(1, mid)
+            output = impl.cat.cat(
+                ctx, target, source_ir, f"{name}_cat_{axis}", split_output, axis
+            )
+        else:
+            mid1 = impl.select.select(
+                ctx,
+                target,
+                source_ir,
+                f"{name}_select_{axis}",
+                original_input,
+                axis,
+                input_dim // 2 - 1,
+            )
+            new_shape = list(mid1.shape)
+            new_shape.insert(positive_axis, 1)
+            mid1 = impl.shuffle.reshape(
+                ctx, target, source_ir, f"{name}_reshape_{axis}", mid1, new_shape
+            )
+            mid2 = impl.select.select(
+                ctx,
+                target,
+                source_ir,
+                f"{name}_select_{axis}",
+                original_input,
+                axis,
+                input_dim // 2,
+            )
+            mid2 = impl.shuffle.reshape(
+                ctx, target, source_ir, f"{name}_reshape_{axis}", mid2, new_shape
+            )
+            split_output = impl.split.split(
+                ctx,
+                target,
+                source_ir,
+                f"{name}_split_{axis}",
+                output,
+                [output_dim // 2, 1, output_dim // 2],
+                axis,
+            )
+            split_output[1] = mid1
+            split_output.insert(2, mid2)
+            output = impl.cat.cat(
+                ctx, target, source_ir, f"{name}_cat_{axis}", split_output, axis
+            )
+
+    if input_rank == 3:  # reshape back to 3D
+        output = impl.shuffle.reshape(
+            ctx, target, source_ir, f"{name}_reshape_back", output, (*output.shape[1:],)
+        )
 
     return output