pytorch
diff --git a/‎.circleci/cimodel/data/pytorch_build_definitions.py‎
Lines changed: 2 additions & 2 deletions b/‎.circleci/cimodel/data/pytorch_build_definitions.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.circleci/config.yml‎
Lines changed: 29 additions & 59 deletions b/‎.circleci/config.yml‎
Lines changed: 29 additions & 59 deletions
diff --git a/‎.circleci/scripts/binary_linux_build.sh‎
Lines changed: 2 additions & 12 deletions b/‎.circleci/scripts/binary_linux_build.sh‎
Lines changed: 2 additions & 12 deletions
diff --git a/‎.circleci/verbatim-sources/job-specs/binary-job-specs.yml‎
Lines changed: 0 additions & 30 deletions b/‎.circleci/verbatim-sources/job-specs/binary-job-specs.yml‎
Lines changed: 0 additions & 30 deletions
diff --git a/‎.github/workflows/lint.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/lint.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎aten/src/ATen/core/aten_interned_strings.h‎
Lines changed: 2 additions & 0 deletions b/‎aten/src/ATen/core/aten_interned_strings.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎aten/src/ATen/cuda/CUDABlas.cpp‎
Lines changed: 14 additions & 0 deletions b/‎aten/src/ATen/cuda/CUDABlas.cpp‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎aten/src/ATen/cuda/CUDABlas.h‎
Lines changed: 10 additions & 0 deletions b/‎aten/src/ATen/cuda/CUDABlas.h‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎aten/src/ATen/native/Blas.cpp‎
Lines changed: 32 additions & 5 deletions b/‎aten/src/ATen/native/Blas.cpp‎
Lines changed: 32 additions & 5 deletions
@@ -191,7 +191,7 @@ def gen_dependent_configs(xenial_parent_config):
             restrict_phases=["test"],
             gpu_resource=gpu,
             parent_build=xenial_parent_config,
-            is_important=xenial_parent_config.is_important,
+            is_important=False,
         )
 
         configs.append(c)
@@ -353,7 +353,7 @@ def instantiate_configs():
         ):
             c.dependent_tests = gen_docs_configs(c)
 
-        if cuda_version == "10.1" and python_version == "3.6" and not is_libtorch:
+        if cuda_version == "10.2" and python_version == "3.6" and not is_libtorch:
             c.dependent_tests = gen_dependent_configs(c)
 
         if (
 
@@ -1011,35 +1011,6 @@ jobs:
         <<: *binary_checkout
     - run:
         <<: *binary_populate_env
-    - run:
-        name: Install unbuffer and ts
-        command: |
-            set -eux -o pipefail
-            source /env
-            OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
-            if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
-              retry yum -q -y install epel-release
-              retry yum -q -y install expect moreutils
-            elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
-              retry apt-get update
-              retry apt-get -y install expect moreutils
-              retry conda install -y -c eumetsat expect
-              retry conda install -y cmake
-            fi
-    - run:
-        name: Update compiler to devtoolset7
-        command: |
-            set -eux -o pipefail
-            source /env
-            if [[ "$DESIRED_DEVTOOLSET" == 'devtoolset7' ]]; then
-              source "/builder/update_compiler.sh"
-
-              # Env variables are not persisted into the next step
-              echo "export PATH=$PATH" >> /env
-              echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> /env
-            else
-              echo "Not updating compiler"
-            fi
     - run:
         name: Build
         no_output_timeout: "1h"
@@ -1059,7 +1030,6 @@ jobs:
             python3 -mpip install requests && \
             SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
             python3 /pytorch/.circleci/scripts/upload_binary_size_to_scuba.py || exit 0
-
     - persist_to_workspace:
         root: /
         paths: final_pkgs
@@ -6326,71 +6296,71 @@ workflows:
                 - /release\/.*/
           build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7-build"
           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
+      - pytorch_linux_build:
+          name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
+          requires:
+            - "docker-pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-build"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+      - pytorch_linux_test:
+          name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test
+          requires:
+            - pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
+          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-test"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+          use_cuda_docker_runtime: "1"
+          resource_class: gpu.medium
       - pytorch_linux_test:
-          name: pytorch_linux_xenial_cuda10_1_cudnn7_py3_multigpu_test
+          name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_multigpu_test
           requires:
-            - pytorch_linux_xenial_cuda10_1_cudnn7_py3_gcc7_build
+            - pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
           filters:
             branches:
               only:
                 - master
                 - /ci-all\/.*/
                 - /release\/.*/
-          build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-multigpu-test"
-          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
+          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-multigpu-test"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
           use_cuda_docker_runtime: "1"
           resource_class: gpu.large
       - pytorch_linux_test:
-          name: pytorch_linux_xenial_cuda10_1_cudnn7_py3_nogpu_NO_AVX2_test
+          name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_nogpu_NO_AVX2_test
           requires:
-            - pytorch_linux_xenial_cuda10_1_cudnn7_py3_gcc7_build
+            - pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
           filters:
             branches:
               only:
                 - master
                 - /ci-all\/.*/
                 - /release\/.*/
-          build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-nogpu-NO_AVX2-test"
-          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
+          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-nogpu-NO_AVX2-test"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
           resource_class: large
       - pytorch_linux_test:
-          name: pytorch_linux_xenial_cuda10_1_cudnn7_py3_nogpu_NO_AVX_test
+          name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_nogpu_NO_AVX_test
           requires:
-            - pytorch_linux_xenial_cuda10_1_cudnn7_py3_gcc7_build
+            - pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
           filters:
             branches:
               only:
                 - master
                 - /ci-all\/.*/
                 - /release\/.*/
-          build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-nogpu-NO_AVX-test"
-          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
+          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-nogpu-NO_AVX-test"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
           resource_class: large
       - pytorch_linux_test:
-          name: pytorch_linux_xenial_cuda10_1_cudnn7_py3_slow_test
+          name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_slow_test
           requires:
-            - pytorch_linux_xenial_cuda10_1_cudnn7_py3_gcc7_build
+            - pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
           filters:
             branches:
               only:
                 - master
                 - /ci-all\/.*/
                 - /release\/.*/
-          build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-slow-test"
-          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
-          use_cuda_docker_runtime: "1"
-          resource_class: gpu.medium
-      - pytorch_linux_build:
-          name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
-          requires:
-            - "docker-pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
-          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-build"
-          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
-      - pytorch_linux_test:
-          name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test
-          requires:
-            - pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
-          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-test"
+          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-slow-test"
           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
           use_cuda_docker_runtime: "1"
           resource_class: gpu.medium
 
@@ -5,7 +5,7 @@ set -eux -o pipefail
 source /env
 
 # Defaults here so they can be changed in one place
-export MAX_JOBS=12
+export MAX_JOBS=${MAX_JOBS:-$(nproc --ignore=1)}
 
 # Parse the parameters
 if [[ "$PACKAGE_TYPE" == 'conda' ]]; then
@@ -16,15 +16,5 @@ else
   build_script='manywheel/build.sh'
 fi
 
-# We want to call unbuffer, which calls tclsh which finds the expect
-# package. The expect was installed by yum into /usr/bin so we want to
-# find /usr/bin/tclsh, but this is shadowed by /opt/conda/bin/tclsh in
-# the conda docker images, so we prepend it to the path here.
-if [[ "$PACKAGE_TYPE" == 'conda' ]]; then
-  mkdir /just_tclsh_bin
-  ln -s /usr/bin/tclsh /just_tclsh_bin/tclsh
-  export PATH=/just_tclsh_bin:$PATH
-fi
-
 # Build the package
-SKIP_ALL_TESTS=1 unbuffer "/builder/$build_script" | ts
+SKIP_ALL_TESTS=1 stdbuf -i0 -o0 -e0 "/builder/$build_script"
@@ -7,35 +7,6 @@
         <<: *binary_checkout
     - run:
         <<: *binary_populate_env
-    - run:
-        name: Install unbuffer and ts
-        command: |
-            set -eux -o pipefail
-            source /env
-            OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
-            if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
-              retry yum -q -y install epel-release
-              retry yum -q -y install expect moreutils
-            elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
-              retry apt-get update
-              retry apt-get -y install expect moreutils
-              retry conda install -y -c eumetsat expect
-              retry conda install -y cmake
-            fi
-    - run:
-        name: Update compiler to devtoolset7
-        command: |
-            set -eux -o pipefail
-            source /env
-            if [[ "$DESIRED_DEVTOOLSET" == 'devtoolset7' ]]; then
-              source "/builder/update_compiler.sh"
-
-              # Env variables are not persisted into the next step
-              echo "export PATH=$PATH" >> /env
-              echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> /env
-            else
-              echo "Not updating compiler"
-            fi
     - run:
         name: Build
         no_output_timeout: "1h"
@@ -55,7 +26,6 @@
             python3 -mpip install requests && \
             SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
             python3 /pytorch/.circleci/scripts/upload_binary_size_to_scuba.py || exit 0
-
     - persist_to_workspace:
         root: /
         paths: final_pkgs
 
@@ -144,6 +144,8 @@ jobs:
           # Run Clang-Tidy
           # The negative filters below are to exclude files that include onnx_pb.h or
           # caffe2_pb.h, otherwise we'd have to build protos as part of this CI job.
+          # FunctionsManual.cpp is excluded to keep this diff clean. It will be fixed
+          # in a follow up PR.
           python tools/clang_tidy.py                               \
             --verbose                                              \
             --paths torch/csrc/                                    \
@@ -157,6 +159,7 @@ jobs:
             -g"-torch/csrc/onnx/init.cpp"                          \
             -g"-torch/csrc/cuda/nccl.*"                            \
             -g"-torch/csrc/cuda/python_nccl.cpp"                   \
+            -g"-torch/csrc/autograd/FunctionsManual.cpp"           \
             "$@" > ${GITHUB_WORKSPACE}/clang-tidy-output.txt
 
           cat ${GITHUB_WORKSPACE}/clang-tidy-output.txt
 
@@ -57,6 +57,7 @@ torch/nn/functional.pyi
 torch/csrc/autograd/generated/*
 # Listed manually because some files in this directory are not generated
 torch/testing/_internal/generated/annotated_fn_args.py
+torch/testing/_internal/data/*.pt
 torch/csrc/cudnn/cuDNN.cpp
 torch/csrc/generated
 torch/csrc/generic/TensorMethods.cpp
 
@@ -238,6 +238,8 @@ _(aten, clamp_min) \
 _(aten, clone) \
 _(aten, coalesce) \
 _(aten, combinations) \
+_(aten, _conj) \
+_(aten, conj) \
 _(aten, complex) \
 _(aten, polar) \
 _(aten, constant_pad_nd) \
 
@@ -533,6 +533,20 @@ void dot<at::Half>(CUDABLAS_DOT_ARGTYPES(at::Half)) {
 #endif
 }
 
+template <>
+void vdot<c10::complex<float>>(CUDABLAS_DOT_ARGTYPES(c10::complex<float>)) {
+  TORCH_CUDABLAS_CHECK(cublasCdotc(handle, n, reinterpret_cast<const cuComplex*>(x),
+                                   incx, reinterpret_cast<const cuComplex*>(y), incy,
+                                   reinterpret_cast<cuComplex*>(result)));
+}
+
+template <>
+void vdot<c10::complex<double>>(CUDABLAS_DOT_ARGTYPES(c10::complex<double>)) {
+  TORCH_CUDABLAS_CHECK(cublasZdotc(handle, n, reinterpret_cast<const cuDoubleComplex*>(x),
+                                   incx, reinterpret_cast<const cuDoubleComplex*>(y), incy,
+                                   reinterpret_cast<cuDoubleComplex*>(result)));
+}
+
 } // namespace blas
 } // namespace cuda
 } // namespace at
@@ -131,6 +131,16 @@ void dot<c10::complex<double>>(CUDABLAS_DOT_ARGTYPES(c10::complex<double>));
 template <>
 void dot<c10::complex<float>>(CUDABLAS_DOT_ARGTYPES(c10::complex<float>));
 
+template <typename Dtype>
+inline void vdot(CUDABLAS_DOT_ARGTYPES(Dtype)) {
+  AT_ERROR("at::cuda::blas::vdot: not implemented for ", typeid(Dtype).name());
+}
+
+template <>
+void vdot<c10::complex<float>>(CUDABLAS_DOT_ARGTYPES(c10::complex<float>));
+template <>
+void vdot<c10::complex<double>>(CUDABLAS_DOT_ARGTYPES(c10::complex<double>));
+
 } // namespace blas
 } // namespace cuda
 } // namespace at
@@ -11,6 +11,9 @@ void gemv(char trans, int64_t m, int64_t n, scalar_t alpha, scalar_t *a, int64_t
 template<typename scalar_t>
 scalar_t dot_impl(int64_t n, scalar_t *x, int64_t incx, scalar_t *y, int64_t incy);
 
+template<typename scalar_t>
+scalar_t vdot_impl(int64_t n, scalar_t *x, int64_t incx, scalar_t *y, int64_t incy);
+
 constexpr inline bool lda_cond(int64_t m, int64_t n, int64_t lda) {
   return n == 1 || lda > std::max<int64_t>(1L, m);
 }
@@ -93,9 +96,7 @@ Tensor mv(const Tensor &self, const Tensor &vec) {
   return native::mv_out(result, self, vec);
 }
 
-Tensor dot(const Tensor &self, const Tensor &other){
-  at::NoNamesGuard guard;
-
+inline void dot_check(const Tensor& self, const Tensor& other) {
   TORCH_CHECK(
       self.dim() == 1 && other.dim() == 1,
       "1D tensors expected, but got ",
@@ -116,10 +117,18 @@ Tensor dot(const Tensor &self, const Tensor &other){
       "inconsistent tensor size, expected tensor [",
       self.numel(),
       "] and src [",
-      other.numel(), "] to have the same number of elements, but got ",
-      self.numel(), " and ",
+      other.numel(),
+      "] to have the same number of elements, but got ",
+      self.numel(),
+      " and ",
       other.numel(),
       " elements respectively");
+}
+
+Tensor dot(const Tensor &self, const Tensor &other){
+  at::NoNamesGuard guard;
+
+  dot_check(self, other);
 
   return AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND(at::ScalarType::Half, self.scalar_type(), "dot", [&] {
     Tensor result = at::empty({}, self.options());
@@ -128,4 +137,22 @@ Tensor dot(const Tensor &self, const Tensor &other){
   });
 }
 
+Tensor vdot(const Tensor &self, const Tensor &other){
+  at::NoNamesGuard guard;
+
+  // Dispatch to `dot` for real dtypes.
+  if (!self.is_complex()){
+    return at::dot(self, other);
+  }
+
+  // For complex dtypes.
+  dot_check(self, other);
+  return AT_DISPATCH_COMPLEX_TYPES(self.scalar_type(), "vdot", [&] {
+    Tensor result = at::empty({}, self.options());
+    result.fill_(vdot_impl<scalar_t>(self.numel(), self.data_ptr<scalar_t>(), self.stride(0), other.data_ptr<scalar_t>(), other.stride(0)));
+    return result;
+  });
+
+}
+
 }}  // namespace at::native