Skip to content

Commit 74d46cd

Browse files
committed
[mgpu][integr] Vulkan/GL Integration prototype
1 parent 35cc2bb commit 74d46cd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+4584
-8
lines changed

CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,9 @@ option(USE_SNPE "Use Qualcomm's SNPE library" OFF)
187187
option(USE_SYSTEM_EIGEN_INSTALL
188188
"Use system Eigen instead of the one under third_party" OFF)
189189
option(USE_TENSORRT "Using Nvidia TensorRT library" OFF)
190+
option(USE_VULKAN "Use Vulkan GPU backend" ON)
191+
option(USE_VULKANGL "Use VulkanGL GPU backend" OFF)
192+
option(USE_VULKAN_SHADERC_RUNTIME "Use Vulkan Shader compilation runtime(Needs shaderc lib)" OFF)
190193
option(USE_XNNPACK "Use XNNPACK" ON)
191194
option(USE_ZMQ "Use ZMQ" OFF)
192195
option(USE_ZSTD "Use ZSTD" OFF)
@@ -436,6 +439,16 @@ if(USE_XNNPACK)
436439
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_XNNPACK -DUSE_INTERNAL_THREADPOOL_IMPL")
437440
endif()
438441

442+
if(USE_VULKAN)
443+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_VULKAN")
444+
endif()
445+
if(USE_VULKANGL)
446+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_VULKANGL")
447+
endif()
448+
if(USE_VULKAN_SHADERC_RUNTIME)
449+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_VULKAN_SHADERC_RUNTIME")
450+
endif()
451+
439452
# ---[ Whitelist file if whitelist is specified
440453
include(cmake/Whitelist.cmake)
441454

aten/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ set(ATen_HIP_INCLUDE)
3333
set(ATen_CPU_DEPENDENCY_LIBS)
3434
set(ATen_CUDA_DEPENDENCY_LIBS)
3535
set(ATen_HIP_DEPENDENCY_LIBS)
36+
set(ATen_VULKANGL_DEPENDENCY_LIBS)
3637
set(ATen_PUBLIC_CUDA_DEPENDENCY_LIBS)
3738
set(ATen_PUBLIC_HIP_DEPENDENCY_LIBS)
3839
set(ATEN_INSTALL_BIN_SUBDIR "bin" CACHE PATH "ATen install binary subdirectory")
@@ -120,4 +121,5 @@ set(ATen_THIRD_PARTY_INCLUDE ${ATen_THIRD_PARTY_INCLUDE} PARENT_SCOPE)
120121
set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE)
121122
set(ATen_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE)
122123
set(ATen_HIP_DEPENDENCY_LIBS ${ATen_HIP_DEPENDENCY_LIBS} PARENT_SCOPE)
124+
set(ATen_VULKANGL_DEPENDENCY_LIBS ${ATen_VULKANGL_DEPENDENCY_LIBS} PARENT_SCOPE)
123125
set(ATen_CORE_TEST_SRCS ${ATen_CORE_TEST_SRCS} PARENT_SCOPE)

aten/src/ATen/CMakeLists.txt

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ file(GLOB mkldnn_cpp "mkldnn/*.cpp")
5959
file(GLOB native_cpp "native/*.cpp")
6060
file(GLOB native_mkl_cpp "native/mkl/*.cpp")
6161
file(GLOB native_mkldnn_cpp "native/mkldnn/*.cpp")
62+
file(GLOB native_vulkan_cpp "native/vulkan/*.cpp")
6263
file(GLOB native_sparse_cpp "native/sparse/*.cpp")
6364
file(GLOB native_quantized_cpp
6465
"native/quantized/*.cpp"
@@ -90,7 +91,7 @@ file(GLOB native_quantized_hip_cpp "native/quantized/hip/*.cpp")
9091
file(GLOB native_xnnpack "native/xnnpack/*.cpp")
9192

9293
add_subdirectory(quantized)
93-
set(all_cpu_cpp ${base_cpp} ${ATen_CORE_SRCS} ${native_cpp} ${native_sparse_cpp} ${native_quantized_cpp} ${native_mkl_cpp} ${native_mkldnn_cpp} ${native_xnnpack} ${generated_cpp} ${core_generated_cpp} ${ATen_CPU_SRCS} ${ATen_QUANTIZED_SRCS} ${cpu_kernel_cpp})
94+
set(all_cpu_cpp ${base_cpp} ${ATen_CORE_SRCS} ${native_cpp} ${native_sparse_cpp} ${native_quantized_cpp} ${native_mkl_cpp} ${native_mkldnn_cpp} ${native_xnnpack} ${native_vulkan_cpp} ${generated_cpp} ${core_generated_cpp} ${ATen_CPU_SRCS} ${ATen_QUANTIZED_SRCS} ${cpu_kernel_cpp})
9495
if(AT_MKL_ENABLED)
9596
set(all_cpu_cpp ${all_cpu_cpp} ${mkl_cpp})
9697
endif()
@@ -163,7 +164,11 @@ if(LAPACK_FOUND)
163164
endif()
164165
endif(LAPACK_FOUND)
165166

166-
if(UNIX AND NOT APPLE)
167+
IF (USE_VULKANGL)
168+
list(APPEND ATen_VULKANGL_DEPENDENCY_LIBS EGL GLESv3)
169+
ENDIF()
170+
171+
IF (UNIX AND NOT APPLE)
167172
include(CheckLibraryExists)
168173
# https://github.com/libgit2/libgit2/issues/2128#issuecomment-35649830
169174
CHECK_LIBRARY_EXISTS(rt clock_gettime "time.h" NEED_LIBRT)
@@ -392,3 +397,4 @@ set(ATen_HIP_INCLUDE ${ATen_HIP_INCLUDE} PARENT_SCOPE)
392397
set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE)
393398
set(ATen_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE)
394399
set(ATen_HIP_DEPENDENCY_LIBS ${ATen_HIP_DEPENDENCY_LIBS} PARENT_SCOPE)
400+
set(ATen_VULKANGL_DEPENDENCY_LIBS ${ATen_VULKANGL_DEPENDENCY_LIBS} PARENT_SCOPE)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#include <ATen/detail/VULKANGuardImpl.h>
2+
3+
namespace at {
4+
namespace detail {
5+
6+
C10_REGISTER_GUARD_IMPL(VULKAN, VULKANGuardImpl);
7+
8+
}
9+
} // namespace at
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#pragma once
2+
3+
#include <c10/core/impl/DeviceGuardImplInterface.h>
4+
#include <c10/macros/Macros.h>
5+
6+
namespace at {
7+
namespace detail {
8+
9+
struct VULKANGuardImpl final : public c10::impl::DeviceGuardImplInterface {
10+
VULKANGuardImpl() {}
11+
12+
explicit VULKANGuardImpl(DeviceType t) {
13+
TORCH_INTERNAL_ASSERT(t == DeviceType::VULKAN);
14+
}
15+
16+
DeviceType type() const override {
17+
return DeviceType::VULKAN;
18+
}
19+
Device exchangeDevice(Device) const override {
20+
// no-op
21+
return Device(DeviceType::VULKAN, -1);
22+
}
23+
Device getDevice() const override {
24+
return Device(DeviceType::VULKAN, -1);
25+
}
26+
void setDevice(Device) const override {
27+
// no-op
28+
}
29+
void uncheckedSetDevice(Device d) const noexcept override {
30+
// no-op
31+
}
32+
Stream getStream(Device d) const noexcept override {
33+
// no-op
34+
return Stream(Stream::DEFAULT, Device(DeviceType::VULKAN, -1));
35+
}
36+
// NB: These do NOT set the current device
37+
Stream exchangeStream(Stream s) const noexcept override {
38+
// no-op
39+
return Stream(Stream::DEFAULT, Device(DeviceType::VULKAN, -1));
40+
}
41+
DeviceIndex deviceCount() const noexcept override {
42+
return 1;
43+
}
44+
45+
// Event-related functions
46+
void record(
47+
void** event,
48+
const Stream& stream,
49+
const DeviceIndex device_index,
50+
const EventFlag flag) const override {
51+
TORCH_CHECK(false, "VULKAN backend doesn't support events.");
52+
}
53+
void block(void* event, const Stream& stream) const override {
54+
TORCH_CHECK(false, "VULKAN backend doesn't support events.")
55+
}
56+
bool queryEvent(void* event) const override {
57+
TORCH_CHECK(false, "VULKAN backend doesn't support events.")
58+
}
59+
void destroyEvent(void* event, const DeviceIndex device_index) const
60+
noexcept override {}
61+
};
62+
63+
} // namespace detail
64+
} // namespace at

aten/src/ATen/function_wrapper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def TypedDict(name, attrs, total=True): # type: ignore
242242
('BFloat16', 'BFloat16', 'BFloat16AccrealNotDefined', True),
243243
]
244244

245-
static_dispatch_backends = ['CPU', 'QuantizedCPU']
245+
static_dispatch_backends = ['CPU', 'QuantizedCPU', 'Vulkan']
246246

247247

248248
class NYIError(Exception):

aten/src/ATen/gen.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ def check_all_files_written(self):
171171
def backend_to_devicetype(backend):
172172
if backend == 'QuantizedCPU':
173173
return 'CPU'
174+
if backend == 'Vulkan':
175+
return 'VULKAN'
174176
return backend
175177

176178
backends = ['CPU', 'CUDA']
@@ -362,7 +364,7 @@ def generate_storage_type_and_tensor(backend, density, declarations, per_op_regi
362364
fm.write(env['Type'] + ".cpp", SPARSE_TYPE_DERIVED_CPP, env)
363365
fm.write(env['Type'] + ".h", TYPE_DERIVED_H, env)
364366

365-
if env['DeviceType'] == 'CPU':
367+
if env['DeviceType'] == 'CPU' or env['DeviceType'] == 'VULKAN':
366368
top_env['cpu_type_headers'].append(
367369
'#include "ATen/{}.h"'.format(env['Type']))
368370
else:
@@ -381,6 +383,7 @@ def iterate_types():
381383
yield (backend, density)
382384
for backend in quantized_backends:
383385
yield (backend, 'Dense')
386+
yield('Vulkan', 'Dense')
384387

385388

386389
def gen_per_op_registration_filename(opname):

aten/src/ATen/native/Convolution.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ struct ConvParams {
4242
bool use_miopen(const at::Tensor& input, bool bias_defined) const;
4343
bool use_mkldnn(const at::Tensor& input) const;
4444
bool use_nnpack(const at::Tensor& input) const;
45+
bool use_vulkan(const at::Tensor& input) const;
4546
bool is_depthwise(const at::Tensor& input, const at::Tensor& weight) const;
4647
};
4748

@@ -239,6 +240,15 @@ auto ConvParams::use_nnpack(const at::Tensor& input) const -> bool {
239240
return false;
240241
}
241242

243+
auto ConvParams::use_vulkan(const at::Tensor& input) const -> bool {
244+
return input.is_vulkan() &&
245+
input.scalar_type() == kFloat &&
246+
groups == 1 &&
247+
!is_dilated() &&
248+
!transposed &&
249+
input.ndimension() == 4;
250+
}
251+
242252
// We currently only have depthwise support for the case where groups ==
243253
// nInputPlane and nInputPlane == nOutputPlane (the latter due to the lack of
244254
// a depthwise multiplier)
@@ -695,6 +705,10 @@ at::Tensor _convolution(
695705
params.padding, params.stride, params.dilation, params.groups);
696706
}
697707
#endif
708+
} else if (params.use_vulkan(input)) {
709+
output = at::vulkan_convolution(
710+
input, weight, bias,
711+
params.padding, params.stride, params.dilation, params.groups);
698712
} else if (input.device().type() == c10::DeviceType::CPU || input.device().type() == c10::DeviceType::CUDA) {
699713
if (params.use_cpu_depthwise3x3_winograd(input, weight)) {
700714
output = convolution_depthwise3x3_winograd_stub(

aten/src/ATen/native/native_functions.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@
306306
SparseCPU: add_sparse
307307
SparseCUDA: add_sparse
308308
MkldnnCPU: mkldnn_add
309+
Vulkan: vulkan_add
309310
supports_named_tensor: True
310311

311312
- func: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
@@ -1158,6 +1159,7 @@
11581159
MkldnnCPU: empty_mkldnn
11591160
SparseCPU: empty_sparse
11601161
SparseCUDA: empty_sparse
1162+
Vulkan: empty_vulkan
11611163

11621164
- func: new_empty(Tensor self, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
11631165
use_c10_dispatcher: unboxed_only
@@ -1931,6 +1933,8 @@
19311933

19321934
- func: mkldnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
19331935

1936+
- func: vulkan_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor
1937+
19341938
- func: miopen_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor)
19351939
dispatch:
19361940
CUDA: miopen_batch_norm
@@ -3536,6 +3540,7 @@
35363540
SparseCPU: sparse_to_dense
35373541
SparseCUDA: sparse_to_dense
35383542
MkldnnCPU: mkldnn_to_dense
3543+
Vulkan: vulkan_to_dense
35393544
requires_tensor: True
35403545

35413546
- func: to_dense_backward(Tensor grad, Tensor input) -> Tensor
@@ -3703,6 +3708,12 @@
37033708
dispatch:
37043709
CPU: dense_to_mkldnn
37053710

3711+
- func: to_vulkan(Tensor self) -> Tensor
3712+
use_c10_dispatcher: full
3713+
variants: method
3714+
dispatch:
3715+
CPU: dense_to_vulkan
3716+
37063717
- func: mkldnn_reorder_conv2d_weight(Tensor self, int[2] padding=0, int[2] stride=1, int[2] dilation=1, int groups=1) -> Tensor
37073718
variants: function
37083719
python_module: nn
@@ -6443,10 +6454,13 @@
64436454

64446455
- func: upsample_nearest2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor
64456456
python_module: nn
6457+
use_c10_dispatcher: unboxed_only
6458+
variants: function
64466459
dispatch:
64476460
CPU: upsample_nearest2d_cpu
64486461
CUDA: upsample_nearest2d_cuda
64496462
QuantizedCPU: quantized_upsample_nearest2d_cpu
6463+
Vulkan: upsample_nearest2d_vulkan
64506464

64516465
- func: upsample_nearest2d_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
64526466
python_module: nn

0 commit comments

Comments
 (0)