Skip to content

Commit 42cf2bd

Browse files
committed
Update on "[jit] Better match behavior of loaded ScriptModules vs. freshly created ones"
IR emitter uses `ModuleValue` to represent ScriptModules and emit IR for attribute access, submodule access, etc. `ModuleValue` relies on two pieces of information, the JIT type of the module, and the `ConcreteModuleType`, which encapsulates Python-only information about the module. ScriptModules loaded from a package used to create a dummy ConcreteModuleType without any info in it. This led to divergences in behavior during compilation. This PR makes the two ways of constructing a ConcreteModuleType equivalent, modulo any py-only information (which, by definition, is never present in packaged files anyway). Differential Revision: [D23228738](https://our.internmc.facebook.com/intern/diff/D23228738) [ghstack-poisoned]
2 parents af3d2e5 + 653f684 commit 42cf2bd

File tree

108 files changed

+5550
-3820
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

108 files changed

+5550
-3820
lines changed

.circleci/cimodel/data/pytorch_build_definitions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ def gen_dependent_configs(xenial_parent_config):
191191
restrict_phases=["test"],
192192
gpu_resource=gpu,
193193
parent_build=xenial_parent_config,
194-
is_important=xenial_parent_config.is_important,
194+
is_important=False,
195195
)
196196

197197
configs.append(c)
@@ -353,7 +353,7 @@ def instantiate_configs():
353353
):
354354
c.dependent_tests = gen_docs_configs(c)
355355

356-
if cuda_version == "10.1" and python_version == "3.6" and not is_libtorch:
356+
if cuda_version == "10.2" and python_version == "3.6" and not is_libtorch:
357357
c.dependent_tests = gen_dependent_configs(c)
358358

359359
if (

.circleci/config.yml

Lines changed: 29 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,35 +1011,6 @@ jobs:
10111011
<<: *binary_checkout
10121012
- run:
10131013
<<: *binary_populate_env
1014-
- run:
1015-
name: Install unbuffer and ts
1016-
command: |
1017-
set -eux -o pipefail
1018-
source /env
1019-
OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
1020-
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
1021-
retry yum -q -y install epel-release
1022-
retry yum -q -y install expect moreutils
1023-
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
1024-
retry apt-get update
1025-
retry apt-get -y install expect moreutils
1026-
retry conda install -y -c eumetsat expect
1027-
retry conda install -y cmake
1028-
fi
1029-
- run:
1030-
name: Update compiler to devtoolset7
1031-
command: |
1032-
set -eux -o pipefail
1033-
source /env
1034-
if [[ "$DESIRED_DEVTOOLSET" == 'devtoolset7' ]]; then
1035-
source "/builder/update_compiler.sh"
1036-
1037-
# Env variables are not persisted into the next step
1038-
echo "export PATH=$PATH" >> /env
1039-
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> /env
1040-
else
1041-
echo "Not updating compiler"
1042-
fi
10431014
- run:
10441015
name: Build
10451016
no_output_timeout: "1h"
@@ -1059,7 +1030,6 @@ jobs:
10591030
python3 -mpip install requests && \
10601031
SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
10611032
python3 /pytorch/.circleci/scripts/upload_binary_size_to_scuba.py || exit 0
1062-
10631033
- persist_to_workspace:
10641034
root: /
10651035
paths: final_pkgs
@@ -6326,71 +6296,71 @@ workflows:
63266296
- /release\/.*/
63276297
build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7-build"
63286298
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
6299+
- pytorch_linux_build:
6300+
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
6301+
requires:
6302+
- "docker-pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
6303+
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-build"
6304+
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
6305+
- pytorch_linux_test:
6306+
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test
6307+
requires:
6308+
- pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
6309+
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-test"
6310+
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
6311+
use_cuda_docker_runtime: "1"
6312+
resource_class: gpu.medium
63296313
- pytorch_linux_test:
6330-
name: pytorch_linux_xenial_cuda10_1_cudnn7_py3_multigpu_test
6314+
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_multigpu_test
63316315
requires:
6332-
- pytorch_linux_xenial_cuda10_1_cudnn7_py3_gcc7_build
6316+
- pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
63336317
filters:
63346318
branches:
63356319
only:
63366320
- master
63376321
- /ci-all\/.*/
63386322
- /release\/.*/
6339-
build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-multigpu-test"
6340-
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
6323+
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-multigpu-test"
6324+
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
63416325
use_cuda_docker_runtime: "1"
63426326
resource_class: gpu.large
63436327
- pytorch_linux_test:
6344-
name: pytorch_linux_xenial_cuda10_1_cudnn7_py3_nogpu_NO_AVX2_test
6328+
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_nogpu_NO_AVX2_test
63456329
requires:
6346-
- pytorch_linux_xenial_cuda10_1_cudnn7_py3_gcc7_build
6330+
- pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
63476331
filters:
63486332
branches:
63496333
only:
63506334
- master
63516335
- /ci-all\/.*/
63526336
- /release\/.*/
6353-
build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-nogpu-NO_AVX2-test"
6354-
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
6337+
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-nogpu-NO_AVX2-test"
6338+
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
63556339
resource_class: large
63566340
- pytorch_linux_test:
6357-
name: pytorch_linux_xenial_cuda10_1_cudnn7_py3_nogpu_NO_AVX_test
6341+
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_nogpu_NO_AVX_test
63586342
requires:
6359-
- pytorch_linux_xenial_cuda10_1_cudnn7_py3_gcc7_build
6343+
- pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
63606344
filters:
63616345
branches:
63626346
only:
63636347
- master
63646348
- /ci-all\/.*/
63656349
- /release\/.*/
6366-
build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-nogpu-NO_AVX-test"
6367-
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
6350+
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-nogpu-NO_AVX-test"
6351+
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
63686352
resource_class: large
63696353
- pytorch_linux_test:
6370-
name: pytorch_linux_xenial_cuda10_1_cudnn7_py3_slow_test
6354+
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_slow_test
63716355
requires:
6372-
- pytorch_linux_xenial_cuda10_1_cudnn7_py3_gcc7_build
6356+
- pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
63736357
filters:
63746358
branches:
63756359
only:
63766360
- master
63776361
- /ci-all\/.*/
63786362
- /release\/.*/
6379-
build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-slow-test"
6380-
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
6381-
use_cuda_docker_runtime: "1"
6382-
resource_class: gpu.medium
6383-
- pytorch_linux_build:
6384-
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
6385-
requires:
6386-
- "docker-pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
6387-
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-build"
6388-
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
6389-
- pytorch_linux_test:
6390-
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test
6391-
requires:
6392-
- pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
6393-
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-test"
6363+
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-slow-test"
63946364
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
63956365
use_cuda_docker_runtime: "1"
63966366
resource_class: gpu.medium

.circleci/scripts/binary_linux_build.sh

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set -eux -o pipefail
55
source /env
66

77
# Defaults here so they can be changed in one place
8-
export MAX_JOBS=12
8+
export MAX_JOBS=${MAX_JOBS:-$(nproc --ignore=1)}
99

1010
# Parse the parameters
1111
if [[ "$PACKAGE_TYPE" == 'conda' ]]; then
@@ -16,15 +16,5 @@ else
1616
build_script='manywheel/build.sh'
1717
fi
1818

19-
# We want to call unbuffer, which calls tclsh which finds the expect
20-
# package. The expect was installed by yum into /usr/bin so we want to
21-
# find /usr/bin/tclsh, but this is shadowed by /opt/conda/bin/tclsh in
22-
# the conda docker images, so we prepend it to the path here.
23-
if [[ "$PACKAGE_TYPE" == 'conda' ]]; then
24-
mkdir /just_tclsh_bin
25-
ln -s /usr/bin/tclsh /just_tclsh_bin/tclsh
26-
export PATH=/just_tclsh_bin:$PATH
27-
fi
28-
2919
# Build the package
30-
SKIP_ALL_TESTS=1 unbuffer "/builder/$build_script" | ts
20+
SKIP_ALL_TESTS=1 stdbuf -i0 -o0 -e0 "/builder/$build_script"

.circleci/verbatim-sources/job-specs/binary-job-specs.yml

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,35 +7,6 @@
77
<<: *binary_checkout
88
- run:
99
<<: *binary_populate_env
10-
- run:
11-
name: Install unbuffer and ts
12-
command: |
13-
set -eux -o pipefail
14-
source /env
15-
OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
16-
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
17-
retry yum -q -y install epel-release
18-
retry yum -q -y install expect moreutils
19-
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
20-
retry apt-get update
21-
retry apt-get -y install expect moreutils
22-
retry conda install -y -c eumetsat expect
23-
retry conda install -y cmake
24-
fi
25-
- run:
26-
name: Update compiler to devtoolset7
27-
command: |
28-
set -eux -o pipefail
29-
source /env
30-
if [[ "$DESIRED_DEVTOOLSET" == 'devtoolset7' ]]; then
31-
source "/builder/update_compiler.sh"
32-
33-
# Env variables are not persisted into the next step
34-
echo "export PATH=$PATH" >> /env
35-
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> /env
36-
else
37-
echo "Not updating compiler"
38-
fi
3910
- run:
4011
name: Build
4112
no_output_timeout: "1h"
@@ -55,7 +26,6 @@
5526
python3 -mpip install requests && \
5627
SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
5728
python3 /pytorch/.circleci/scripts/upload_binary_size_to_scuba.py || exit 0
58-
5929
- persist_to_workspace:
6030
root: /
6131
paths: final_pkgs

.github/workflows/lint.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ jobs:
144144
# Run Clang-Tidy
145145
# The negative filters below are to exclude files that include onnx_pb.h or
146146
# caffe2_pb.h, otherwise we'd have to build protos as part of this CI job.
147+
# FunctionsManual.cpp is excluded to keep this diff clean. It will be fixed
148+
# in a follow up PR.
147149
python tools/clang_tidy.py \
148150
--verbose \
149151
--paths torch/csrc/ \
@@ -157,6 +159,7 @@ jobs:
157159
-g"-torch/csrc/onnx/init.cpp" \
158160
-g"-torch/csrc/cuda/nccl.*" \
159161
-g"-torch/csrc/cuda/python_nccl.cpp" \
162+
-g"-torch/csrc/autograd/FunctionsManual.cpp" \
160163
"$@" > ${GITHUB_WORKSPACE}/clang-tidy-output.txt
161164
162165
cat ${GITHUB_WORKSPACE}/clang-tidy-output.txt

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ torch/nn/functional.pyi
5757
torch/csrc/autograd/generated/*
5858
# Listed manually because some files in this directory are not generated
5959
torch/testing/_internal/generated/annotated_fn_args.py
60+
torch/testing/_internal/data/*.pt
6061
torch/csrc/cudnn/cuDNN.cpp
6162
torch/csrc/generated
6263
torch/csrc/generic/TensorMethods.cpp

aten/src/ATen/core/aten_interned_strings.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,8 @@ _(aten, clamp_min) \
238238
_(aten, clone) \
239239
_(aten, coalesce) \
240240
_(aten, combinations) \
241+
_(aten, _conj) \
242+
_(aten, conj) \
241243
_(aten, complex) \
242244
_(aten, polar) \
243245
_(aten, constant_pad_nd) \

aten/src/ATen/cuda/CUDABlas.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,20 @@ void dot<at::Half>(CUDABLAS_DOT_ARGTYPES(at::Half)) {
533533
#endif
534534
}
535535

536+
template <>
537+
void vdot<c10::complex<float>>(CUDABLAS_DOT_ARGTYPES(c10::complex<float>)) {
538+
TORCH_CUDABLAS_CHECK(cublasCdotc(handle, n, reinterpret_cast<const cuComplex*>(x),
539+
incx, reinterpret_cast<const cuComplex*>(y), incy,
540+
reinterpret_cast<cuComplex*>(result)));
541+
}
542+
543+
template <>
544+
void vdot<c10::complex<double>>(CUDABLAS_DOT_ARGTYPES(c10::complex<double>)) {
545+
TORCH_CUDABLAS_CHECK(cublasZdotc(handle, n, reinterpret_cast<const cuDoubleComplex*>(x),
546+
incx, reinterpret_cast<const cuDoubleComplex*>(y), incy,
547+
reinterpret_cast<cuDoubleComplex*>(result)));
548+
}
549+
536550
} // namespace blas
537551
} // namespace cuda
538552
} // namespace at

aten/src/ATen/cuda/CUDABlas.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,16 @@ void dot<c10::complex<double>>(CUDABLAS_DOT_ARGTYPES(c10::complex<double>));
131131
template <>
132132
void dot<c10::complex<float>>(CUDABLAS_DOT_ARGTYPES(c10::complex<float>));
133133

134+
template <typename Dtype>
135+
inline void vdot(CUDABLAS_DOT_ARGTYPES(Dtype)) {
136+
AT_ERROR("at::cuda::blas::vdot: not implemented for ", typeid(Dtype).name());
137+
}
138+
139+
template <>
140+
void vdot<c10::complex<float>>(CUDABLAS_DOT_ARGTYPES(c10::complex<float>));
141+
template <>
142+
void vdot<c10::complex<double>>(CUDABLAS_DOT_ARGTYPES(c10::complex<double>));
143+
134144
} // namespace blas
135145
} // namespace cuda
136146
} // namespace at

aten/src/ATen/native/Blas.cpp

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ void gemv(char trans, int64_t m, int64_t n, scalar_t alpha, scalar_t *a, int64_t
1111
template<typename scalar_t>
1212
scalar_t dot_impl(int64_t n, scalar_t *x, int64_t incx, scalar_t *y, int64_t incy);
1313

14+
template<typename scalar_t>
15+
scalar_t vdot_impl(int64_t n, scalar_t *x, int64_t incx, scalar_t *y, int64_t incy);
16+
1417
constexpr inline bool lda_cond(int64_t m, int64_t n, int64_t lda) {
1518
return n == 1 || lda > std::max<int64_t>(1L, m);
1619
}
@@ -93,9 +96,7 @@ Tensor mv(const Tensor &self, const Tensor &vec) {
9396
return native::mv_out(result, self, vec);
9497
}
9598

96-
Tensor dot(const Tensor &self, const Tensor &other){
97-
at::NoNamesGuard guard;
98-
99+
inline void dot_check(const Tensor& self, const Tensor& other) {
99100
TORCH_CHECK(
100101
self.dim() == 1 && other.dim() == 1,
101102
"1D tensors expected, but got ",
@@ -116,10 +117,18 @@ Tensor dot(const Tensor &self, const Tensor &other){
116117
"inconsistent tensor size, expected tensor [",
117118
self.numel(),
118119
"] and src [",
119-
other.numel(), "] to have the same number of elements, but got ",
120-
self.numel(), " and ",
120+
other.numel(),
121+
"] to have the same number of elements, but got ",
122+
self.numel(),
123+
" and ",
121124
other.numel(),
122125
" elements respectively");
126+
}
127+
128+
Tensor dot(const Tensor &self, const Tensor &other){
129+
at::NoNamesGuard guard;
130+
131+
dot_check(self, other);
123132

124133
return AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND(at::ScalarType::Half, self.scalar_type(), "dot", [&] {
125134
Tensor result = at::empty({}, self.options());
@@ -128,4 +137,22 @@ Tensor dot(const Tensor &self, const Tensor &other){
128137
});
129138
}
130139

140+
Tensor vdot(const Tensor &self, const Tensor &other){
141+
at::NoNamesGuard guard;
142+
143+
// Dispatch to `dot` for real dtypes.
144+
if (!self.is_complex()){
145+
return at::dot(self, other);
146+
}
147+
148+
// For complex dtypes.
149+
dot_check(self, other);
150+
return AT_DISPATCH_COMPLEX_TYPES(self.scalar_type(), "vdot", [&] {
151+
Tensor result = at::empty({}, self.options());
152+
result.fill_(vdot_impl<scalar_t>(self.numel(), self.data_ptr<scalar_t>(), self.stride(0), other.data_ptr<scalar_t>(), other.stride(0)));
153+
return result;
154+
});
155+
156+
}
157+
131158
}} // namespace at::native

0 commit comments

Comments
 (0)