Skip to content

Commit 650c10f

Browse files
author
Peter Yeh
committed
Merge remote-tracking branch 'upstream/master' into caffe2_ops_miopen
* upstream/master: (47 commits) use THCThrustAllocator in BCECriterion (pytorch#8188) Add more annotations for arguments in ATen schema (pytorch#8192) fix lint Fix scalar check for sparse tensors. (pytorch#8197) [Caffe2] Merging setup.py with setup_caffe2.py (pytorch#8129) [auto] Update onnx to 4e65fd8 - fuse consecutive squeezes (onnx/onnx#1078) onnx/onnx@4e65fd8 Add a loop unrolling pass to PyTorch JIT (pytorch#7672) Fix protobuf options (pytorch#8184) Yangqing as an ONNX codeowner (pytorch#8185) [ONNX] Fix type_as symbolic (pytorch#8183) fix caffe2 docker build (pytorch#7411) Add retry logic to sccache download for Windows build (pytorch#7697) Better conv error message basing on weight shape (pytorch#8051) Fix a corner case for ReShapeOp (pytorch#8178) Get rid of SOVERSION (again). (pytorch#8132) Add back onnx console scripts dropped during migration from onnx-caffe2 (pytorch#8143) use the correct datatype format (pytorch#8144) [JIT] Support a single TensorList argument anywhere in the argument list + index_put (pytorch#8173) Export getCudnnHandle (pytorch#7726) [Caffe2] Update elementwise ops to support numpy style boradcast (pytorch#8070) ...
2 parents 8e5fe41 + e6044e5 commit 650c10f

File tree

235 files changed

+8588
-4202
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

235 files changed

+8588
-4202
lines changed

.jenkins/caffe2/build.sh

Lines changed: 86 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,15 @@
22

33
set -ex
44

5+
# The INSTALL_PREFIX here must match up with test.sh
6+
INSTALL_PREFIX="/usr/local/caffe2"
57
LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
68
ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd)
9+
CMAKE_ARGS=()
10+
711

12+
# Setup SCCACHE
13+
###############################################################################
814
# Setup sccache if SCCACHE_BUCKET is set
915
if [ -n "${SCCACHE_BUCKET}" ]; then
1016
mkdir -p ./sccache
@@ -61,24 +67,29 @@ report_compile_cache_stats() {
6167
fi
6268
}
6369

64-
CMAKE_ARGS=("-DBUILD_BINARY=ON")
65-
CMAKE_ARGS+=("-DUSE_OBSERVERS=ON")
66-
CMAKE_ARGS+=("-DUSE_ZSTD=ON")
6770

68-
if [[ $BUILD_ENVIRONMENT == *-aten-* ]]; then
69-
if [[ CMAKE_ARGS != *USE_ATEN* ]] && [[ CMAKE_ARGS != *BUILD_ATEN* ]]; then
70-
CMAKE_ARGS+=("-DBUILD_ATEN=ON")
71-
fi
71+
###############################################################################
72+
# Explicitly set Python executable.
73+
###############################################################################
74+
# On Ubuntu 16.04 the default Python is still 2.7.
75+
PYTHON="$(which python)"
76+
if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then
77+
PYTHON=$(which "python${BASH_REMATCH[1]}")
78+
CMAKE_ARGS+=("-DPYTHON_EXECUTABLE=${PYTHON}")
7279
fi
7380
74-
# Run build script from scripts if applicable
81+
82+
###############################################################################
83+
# Use special scripts for Android, conda, and setup builds
84+
###############################################################################
7585
if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
7686
export ANDROID_NDK=/opt/ndk
87+
CMAKE_ARGS+=("-DBUILD_BINARY=ON")
88+
CMAKE_ARGS+=("-DUSE_OBSERVERS=ON")
89+
CMAKE_ARGS+=("-DUSE_ZSTD=ON")
7790
"${ROOT_DIR}/scripts/build_android.sh" ${CMAKE_ARGS[*]} "$@"
7891
exit 0
79-
fi
80-
if [[ "${BUILD_ENVIRONMENT}" == conda* ]]; then
81-
92+
elif [[ "${BUILD_ENVIRONMENT}" == conda* ]]; then
8293
# click (required by onnx) wants these set
8394
# TODO don't think this fixes the problem for conda3 yet
8495
export LANG=C.UTF-8
@@ -96,51 +107,50 @@ if [[ "${BUILD_ENVIRONMENT}" == conda* ]]; then
96107
PROTOBUF_INCDIR=/opt/conda/include pip install -b /tmp/pip_install_onnx "file://${ROOT_DIR}/third_party/onnx#egg=onnx"
97108
report_compile_cache_stats
98109
exit 0
110+
elif [[ $BUILD_ENVIRONMENT == *setup* ]]; then
111+
rm -rf $INSTALL_PREFIX && mkdir $INSTALL_PREFIX
112+
PYTHONPATH=$INSTALL_PREFIX $PYTHON setup_caffe2.py develop --install-dir $INSTALL_PREFIX
113+
exit 0
99114
fi
100115
101-
# Run cmake from ./build_caffe2 directory so it doesn't conflict with
102-
# standard PyTorch build directory. Eventually these won't need to
103-
# be separate.
104-
rm -rf build_caffe2
105-
mkdir build_caffe2
106-
cd ./build_caffe2
107116
108-
INSTALL_PREFIX="/usr/local/caffe2"
117+
###############################################################################
118+
# Set cmake args
119+
###############################################################################
120+
CMAKE_ARGS+=("-DBUILD_BINARY=ON")
121+
CMAKE_ARGS+=("-DUSE_OBSERVERS=ON")
122+
CMAKE_ARGS+=("-DUSE_ZSTD=ON")
109123
CMAKE_ARGS+=("-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}")
110124
111-
# Explicitly set Python executable.
112-
# On Ubuntu 16.04 the default Python is still 2.7.
113-
PYTHON="$(which python)"
114-
if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then
115-
PYTHON=$(which "python${BASH_REMATCH[1]}")
116-
CMAKE_ARGS+=("-DPYTHON_EXECUTABLE=${PYTHON}")
125+
if [[ $BUILD_ENVIRONMENT == *-aten-* ]]; then
126+
if [[ CMAKE_ARGS != *USE_ATEN* ]] && [[ CMAKE_ARGS != *BUILD_ATEN* ]]; then
127+
CMAKE_ARGS+=("-DBUILD_ATEN=ON")
128+
fi
117129
fi
130+
if [[ $BUILD_ENVIRONMENT == *mkl* ]]; then
131+
CMAKE_ARGS+=("-DBLAS=MKL")
132+
fi
133+
if [[ $BUILD_ENVIRONMENT == *cuda* ]]; then
134+
CMAKE_ARGS+=("-DUSE_CUDA=ON")
135+
CMAKE_ARGS+=("-DCUDA_ARCH_NAME=Maxwell")
136+
CMAKE_ARGS+=("-DUSE_NNPACK=OFF")
137+
138+
# Explicitly set path to NVCC such that the symlink to ccache or sccache is used
139+
CMAKE_ARGS+=("-DCUDA_NVCC_EXECUTABLE=${CACHE_WRAPPER_DIR}/nvcc")
118140
119-
case "${BUILD_ENVIRONMENT}" in
120-
*-mkl*)
121-
CMAKE_ARGS+=("-DBLAS=MKL")
122-
;;
123-
*-cuda*)
124-
CMAKE_ARGS+=("-DUSE_CUDA=ON")
125-
CMAKE_ARGS+=("-DCUDA_ARCH_NAME=Maxwell")
126-
CMAKE_ARGS+=("-DUSE_NNPACK=OFF")
127-
128-
# Explicitly set path to NVCC such that the symlink to ccache or sccache is used
129-
CMAKE_ARGS+=("-DCUDA_NVCC_EXECUTABLE=${CACHE_WRAPPER_DIR}/nvcc")
130-
131-
# Ensure FindCUDA.cmake can infer the right path to the CUDA toolkit.
132-
# Setting PATH to resolve to the right nvcc alone isn't enough.
133-
# See /usr/share/cmake-3.5/Modules/FindCUDA.cmake, block at line 589.
134-
export CUDA_PATH="/usr/local/cuda"
135-
136-
# Ensure the ccache symlink can still find the real nvcc binary.
137-
export PATH="/usr/local/cuda/bin:$PATH"
138-
;;
139-
*-rocm*)
140-
export LANG=C.UTF-8
141-
export LC_ALL=C.UTF-8
142-
export HCC_AMDGPU_TARGET=gfx900
143-
esac
141+
# Ensure FindCUDA.cmake can infer the right path to the CUDA toolkit.
142+
# Setting PATH to resolve to the right nvcc alone isn't enough.
143+
# See /usr/share/cmake-3.5/Modules/FindCUDA.cmake, block at line 589.
144+
export CUDA_PATH="/usr/local/cuda"
145+
146+
# Ensure the ccache symlink can still find the real nvcc binary.
147+
export PATH="/usr/local/cuda/bin:$PATH"
148+
fi
149+
if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
150+
export LANG=C.UTF-8
151+
export LC_ALL=C.UTF-8
152+
export HCC_AMDGPU_TARGET=gfx900
153+
fi
144154
145155
# Try to include Redis support for Linux builds
146156
if [ "$(uname)" == "Linux" ]; then
@@ -154,14 +164,6 @@ if [ "$(uname)" == "Darwin" ]; then
154164
CMAKE_ARGS+=("-DBUILD_CUSTOM_PROTOBUF=ON")
155165
fi
156166
157-
# We test the presence of cmake3 (for platforms like Centos and Ubuntu 14.04)
158-
# and use that if so.
159-
if [[ -x "$(command -v cmake3)" ]]; then
160-
CMAKE_BINARY=cmake3
161-
else
162-
CMAKE_BINARY=cmake
163-
fi
164-
165167
# Use a speciallized onnx namespace in CI to catch hardcoded onnx namespace
166168
CMAKE_ARGS+=("-DONNX_NAMESPACE=ONNX_NAMESPACE_FOR_C2_CI")
167169
@@ -173,17 +175,35 @@ if [[ -n "$INTEGRATED" ]]; then
173175
CMAKE_ARGS+=("-DCAFFE2_LINK_LOCAL_PROTOBUF=OFF")
174176
fi
175177
176-
# Configure
177-
${CMAKE_BINARY} "${ROOT_DIR}" ${CMAKE_ARGS[*]} "$@"
178-
179-
# Build
178+
# We test the presence of cmake3 (for platforms like Centos and Ubuntu 14.04)
179+
# and use that if so.
180+
if [[ -x "$(command -v cmake3)" ]]; then
181+
CMAKE_BINARY=cmake3
182+
else
183+
CMAKE_BINARY=cmake
184+
fi
180185
# sccache will fail for CUDA builds if all cores are used for compiling
181186
if [[ "${BUILD_ENVIRONMENT}" == *-cuda* ]] && [ -n "${SCCACHE}" ]; then
182187
MAX_JOBS=`expr $(nproc) - 1`
183188
else
184189
MAX_JOBS=$(nproc)
185190
fi
186191
192+
193+
###############################################################################
194+
# Configure and make
195+
###############################################################################
196+
# Run cmake from ./build_caffe2 directory so it doesn't conflict with
197+
# standard PyTorch build directory. Eventually these won't need to
198+
# be separate.
199+
rm -rf build_caffe2
200+
mkdir build_caffe2
201+
cd ./build_caffe2
202+
203+
# Configure
204+
${CMAKE_BINARY} "${ROOT_DIR}" ${CMAKE_ARGS[*]} "$@"
205+
206+
# Build
187207
if [ "$(uname)" == "Linux" ]; then
188208
make "-j${MAX_JOBS}" install
189209
else
@@ -193,6 +213,11 @@ fi
193213
194214
report_compile_cache_stats
195215
216+
217+
###############################################################################
218+
# Install ONNX
219+
###############################################################################
220+
196221
# Install ONNX into a local directory
197222
pip install --user -b /tmp/pip_install_onnx "file://${ROOT_DIR}/third_party/onnx#egg=onnx"
198223

.jenkins/pytorch/test.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ fi
3636

3737
export ATEN_DISABLE_AVX=
3838
export ATEN_DISABLE_AVX2=
39-
if [[ "${JOB_BASE_NAME}" == *NO_AVX* ]]; then
39+
if [[ "${JOB_BASE_NAME}" == *-NO_AVX-* ]]; then
4040
export ATEN_DISABLE_AVX=1
4141
fi
42-
if [[ "${JOB_BASE_NAME}" == *NO_AVX2* ]]; then
42+
if [[ "${JOB_BASE_NAME}" == *-NO_AVX2-* ]]; then
4343
export ATEN_DISABLE_AVX2=1
4444
fi
4545

.jenkins/pytorch/win-build.sh

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,15 @@ set MAGMA_HOME=%cd%\\magma
4444
4545
:: Install sccache
4646
mkdir %CD%\\tmp_bin
47-
if "%REBUILD%"=="" ( aws s3 cp s3://ossci-windows/sccache.exe %CD%\\tmp_bin\\sccache.exe --quiet )
47+
if "%REBUILD%"=="" (
48+
:check_sccache
49+
%CD%\\tmp_bin\\sccache.exe --show-stats || (
50+
taskkill /im sccache.exe /f /t || set ERRORLEVEL=0
51+
del %CD%\\tmp_bin\\sccache.exe
52+
aws s3 cp s3://ossci-windows/sccache.exe %CD%\\tmp_bin\\sccache.exe
53+
goto :check_sccache
54+
)
55+
)
4856
4957
:: Install Miniconda3
5058
if "%REBUILD%"=="" (
@@ -73,7 +81,7 @@ set CUDNN_ROOT_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0
7381
:: Target only our CI GPU machine's CUDA arch to speed up the build
7482
set TORCH_CUDA_ARCH_LIST=5.2
7583
76-
sccache --stop-server || set ERRORLEVEL=0
84+
sccache --stop-server
7785
sccache --start-server
7886
sccache --zero-stats
7987
set CC=sccache cl

CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,15 @@ if(NOT DEFINED BLAS_SET_BY_USER)
3030
set(BLAS_SET_BY_USER ${BLAS_SET_BY_USER} CACHE STRING "Marks whether BLAS was manually set by user or auto-detected")
3131
endif()
3232

33+
# These lines are an attempt to make find_package(cuda) pick up
34+
# libcuda.dylib, and not cuda.framework. It doesn't work all
35+
# the time, but it seems to help for some users.
36+
# TODO: replace this with a more robust fix
37+
if(APPLE)
38+
set(CMAKE_FIND_FRAMEWORK LAST)
39+
set(CMAKE_FIND_APPBUNDLE LAST)
40+
endif()
41+
3342
# ---[ Options.
3443
# Note to developers: if you add an option below, make sure you also add it to
3544
# cmake/Summary.cmake so that the summary prints out the option values.

CODEOWNERS

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
/requirements.txt @apaszke @soumith @colesbury @gchanan @zdevito @ezyang
1212
/torch/csrc/api/ @apaszke @soumith @colesbury @gchanan @zdevito @ezyang @ebetica @goldsborough
1313
/test/cpp/api/ @apaszke @soumith @colesbury @gchanan @zdevito @ezyang @ebetica @goldsborough
14-
/torch/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer
15-
/torch/csrc/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer
16-
/torch/csrc/jit/passes/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer
17-
/test/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer
18-
/scripts/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer
14+
/torch/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing
15+
/torch/csrc/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing
16+
/torch/csrc/jit/passes/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing
17+
/test/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing
18+
/scripts/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing
1919
/torch/lib/c10d/ @apaszke @pietern @teng-li

aten/src/ATen/Backtrace.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#pragma once
2+
13
#include <cstddef>
24
#include <string>
35
#include <typeinfo>

aten/src/ATen/CPUApplyUtils.h

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -354,28 +354,26 @@ inline void CPU_tensor_parallel_apply1(
354354
int64_t grain_size = internal::TBB_GRAIN_SIZE) {
355355
if (!_apply_preamble({tensor1}))
356356
return;
357-
if (tensor1.numel() < grain_size) {
358-
CPU_tensor_apply1<scalar1>(tensor1, op);
359-
return;
360-
}
361-
auto range = tbb::blocked_range<size_t>(0, tensor1.numel());
362357
if (tensor1.ndimension() < 8) {
363-
tbb::parallel_for(
364-
range, [&tensor1, &op](const tbb::blocked_range<size_t> r) {
358+
parallel_for(
359+
0,
360+
tensor1.numel(),
361+
grain_size,
362+
[&tensor1, &op](int64_t begin, int64_t end) {
365363
apply_op(
366-
r.end() - r.begin(),
367-
r.begin(),
364+
end - begin,
365+
begin,
368366
op,
369367
strided_tensor_iter_fixed<scalar1, 8>(tensor1, true));
370368
});
371369
} else {
372-
tbb::parallel_for(
373-
range, [&tensor1, &op](const tbb::blocked_range<size_t> r) {
370+
parallel_for(
371+
0,
372+
tensor1.numel(),
373+
grain_size,
374+
[&tensor1, &op](int64_t begin, int64_t end) {
374375
apply_op(
375-
r.end() - r.begin(),
376-
r.begin(),
377-
op,
378-
strided_tensor_iter<scalar1>(tensor1));
376+
end - begin, begin, op, strided_tensor_iter<scalar1>(tensor1));
379377
});
380378
}
381379
}
@@ -388,27 +386,28 @@ inline void CPU_tensor_parallel_apply2(
388386
int64_t grain_size = internal::TBB_GRAIN_SIZE) {
389387
if (!_apply_preamble({tensor1, tensor2}))
390388
return;
391-
if ((tensor1.numel() + tensor2.numel()) < grain_size) {
392-
CPU_tensor_apply2<scalar1, scalar2>(tensor1, tensor2, op);
393-
return;
394-
}
395-
auto range = tbb::blocked_range<size_t>(0, tensor1.numel());
396389
if (tensor1.ndimension() < 8 && tensor2.ndimension() < 8) {
397-
tbb::parallel_for(
398-
range, [&tensor1, &tensor2, &op](const tbb::blocked_range<size_t> r) {
390+
parallel_for(
391+
0,
392+
tensor1.numel(),
393+
grain_size,
394+
[&tensor1, &tensor2, &op](int64_t begin, int64_t end) {
399395
apply_op(
400-
r.end() - r.begin(),
401-
r.begin(),
396+
end - begin,
397+
begin,
402398
op,
403399
strided_tensor_iter_fixed<scalar1, 8>(tensor1),
404400
strided_tensor_iter_fixed<scalar2, 8>(tensor2));
405401
});
406402
} else {
407-
tbb::parallel_for(
408-
range, [&tensor1, &tensor2, &op](const tbb::blocked_range<size_t> r) {
403+
parallel_for(
404+
0,
405+
tensor1.numel(),
406+
grain_size,
407+
[&tensor1, &tensor2, &op](int64_t begin, int64_t end) {
409408
apply_op(
410-
r.end() - r.begin(),
411-
r.begin(),
409+
end - begin,
410+
begin,
412411
op,
413412
strided_tensor_iter<scalar1>(tensor1),
414413
strided_tensor_iter<scalar2>(tensor2));

aten/src/ATen/Context.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ class AT_API Context {
8686
cudaStream_t getCurrentCUDAStream() const {
8787
return detail::getCUDAHooks().getCurrentCUDAStream(thc_state.get());
8888
}
89+
cudaStream_t getCurrentCUDAStreamOnDevice(int64_t device) const {
90+
return detail::getCUDAHooks().getCurrentCUDAStreamOnDevice(thc_state.get(), device);
91+
}
8992
cudaDeviceProp* getCurrentDeviceProperties() const {
9093
return detail::getCUDAHooks().getCurrentDeviceProperties(thc_state.get());
9194
}

0 commit comments

Comments
 (0)