This is a controversial proposal, since it would imply a change to the C-contiguity contract, but it may be useful to save memory and some computation.
a = torch.ones(5, 1, 1)
print(a.expand(len(a), 10, 10).stride())
print(a.expand(len(a), 10, 10).storage().size())
# (1L, 0L, 0L)
# 5L
print(a.expand(len(a), 10, 10).mul(5).stride())
print(a.expand(len(a), 10, 10).mul(5).storage().size())
# (100L, 10L, 1L)
# 500L