Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions aten/src/ATen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/ATenConfig.cmake"
DESTINATION "${AT_INSTALL_SHARE_DIR}/cmake/ATen")

# https://stackoverflow.com/questions/11096471/how-can-i-install-a-hierarchy-of-files-using-cmake
FOREACH(HEADER ${base_h})
FOREACH(HEADER ${base_h} ${cuda_h} ${cudnn_h})
string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" HEADER_SUB ${HEADER})
GET_FILENAME_COMPONENT(DIR ${HEADER_SUB} DIRECTORY)
INSTALL(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/${DIR})
Expand All @@ -406,11 +406,6 @@ FOREACH(HEADER ${generated_h} ${cuda_generated_h})
# NB: Assumed to be flat
INSTALL(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen)
ENDFOREACH()
FOREACH(HEADER ${cuda_h})
string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" HEADER_SUB ${HEADER})
GET_FILENAME_COMPONENT(DIR ${HEADER_SUB} DIRECTORY)
INSTALL(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/${DIR})
ENDFOREACH()
INSTALL(FILES ${CMAKE_BINARY_DIR}/aten/src/ATen/Declarations.yaml
DESTINATION ${AT_INSTALL_SHARE_DIR}/ATen)

Expand Down
13 changes: 7 additions & 6 deletions aten/src/ATen/cudnn/Descriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "cudnn-wrapper.h"
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>
#include "ATen/cuda/ATenCUDAGeneral.h"
#include <cuda.h>

#if CUDNN_VERSION < 7000
Expand Down Expand Up @@ -100,7 +101,7 @@ struct DescriptorDeleter {
// initialized the first time you call set() or any other initializing
// function.
template <typename T, cudnnStatus_t (*ctor)(T**), cudnnStatus_t (*dtor)(T*)>
class Descriptor
class AT_CUDA_API Descriptor
{
public:
// TODO: Figure out why const-correctness doesn't work here
Expand Down Expand Up @@ -129,7 +130,7 @@ class Descriptor
std::unique_ptr<T, DescriptorDeleter<T, dtor>> desc_;
};

class TensorDescriptor
class AT_CUDA_API TensorDescriptor
: public Descriptor<cudnnTensorStruct,
&cudnnCreateTensorDescriptor,
&cudnnDestroyTensorDescriptor>
Expand Down Expand Up @@ -181,7 +182,7 @@ class FilterDescriptor
}
};

struct ConvolutionDescriptor
struct AT_CUDA_API ConvolutionDescriptor
: public Descriptor<cudnnConvolutionStruct,
&cudnnCreateConvolutionDescriptor,
&cudnnDestroyConvolutionDescriptor>
Expand All @@ -200,7 +201,7 @@ struct ConvolutionDescriptor
}
};

struct SpatialTransformerDescriptor
struct AT_CUDA_API SpatialTransformerDescriptor
: public Descriptor<cudnnSpatialTransformerStruct,
&cudnnCreateSpatialTransformerDescriptor,
&cudnnDestroySpatialTransformerDescriptor>
Expand Down Expand Up @@ -239,7 +240,7 @@ inline cudnnStatus_t cudnnRestoreDropoutDescriptor(

#endif // CUDNN_VERSION

struct DropoutDescriptor
struct AT_CUDA_API DropoutDescriptor
: public Descriptor<cudnnDropoutStruct,
&cudnnCreateDropoutDescriptor,
&cudnnDestroyDropoutDescriptor>
Expand Down Expand Up @@ -281,7 +282,7 @@ struct DropoutDescriptor
}
};

struct RNNDescriptor
struct AT_CUDA_API RNNDescriptor
: public Descriptor<cudnnRNNStruct,
&cudnnCreateRNNDescriptor,
&cudnnDestroyRNNDescriptor>
Expand Down
80 changes: 80 additions & 0 deletions test/cpp_extensions/cudnn_extension.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* CuDNN ReLU extension. Simple function but contains the general structure of
* most CuDNN extensions:
* 1) Check arguments. at::check* functions provide a standard way to validate
* input and provide pretty errors.
* 2) Create descriptors. Most CuDNN functions require creating and setting a
* variety of descriptors.
* 3) Apply the CuDNN function.
* 4) Destroy your descriptors.
* 5) Return something (optional).
*/

#include <torch/torch.h>

This comment was marked as off-topic.


#include <ATen/cudnn/Descriptors.h> // for TensorDescriptor
#include <ATen/cudnn/Exceptions.h> // for CUDNN_CHECK
#include <ATen/cudnn/Handles.h> // for getCudnnHandle

// Name of function in python module and name used for error messages by
// at::check* functions.
const char* cudnn_relu_name = "cudnn_relu";

// Check arguments to cudnn_relu
void cudnn_relu_check(const at::Tensor& inputs, const at::Tensor& outputs) {
// Create TensorArgs. These record the names and positions of each tensor as a
// parameter.
at::TensorArg arg_inputs(inputs, "inputs", 0);
at::TensorArg arg_outputs(outputs, "outputs", 1);
// Check arguments. No need to return anything. These functions with throw an
// error if they fail. Messages are populated using information from
// TensorArgs.
at::checkContiguous(cudnn_relu_name, arg_inputs);
at::checkScalarType(cudnn_relu_name, arg_inputs, at::kFloat);
at::checkBackend(cudnn_relu_name, arg_inputs.tensor, at::kCUDA);
at::checkContiguous(cudnn_relu_name, arg_outputs);
at::checkScalarType(cudnn_relu_name, arg_outputs, at::kFloat);
at::checkBackend(cudnn_relu_name, arg_outputs.tensor, at::kCUDA);
at::checkSameSize(cudnn_relu_name, arg_inputs, arg_outputs);
}

void cudnn_relu(const at::Tensor& inputs, const at::Tensor& outputs) {
// Most CuDNN extensions will follow a similar pattern.
// Step 1: Check inputs. This will throw an error if inputs are invalid, so no
// need to check return codes here.
cudnn_relu_check(inputs, outputs);
// Step 2: Create descriptors
cudnnHandle_t cuDnn = at::native::getCudnnHandle();
// Note: 4 is minimum dim for a TensorDescriptor. Input and output are same
// size and type and contiguous, so one descriptor is sufficient.
at::native::TensorDescriptor input_tensor_desc(inputs, 4);
cudnnActivationDescriptor_t activationDesc;
// Note: Always check return value of cudnn functions using CUDNN_CHECK
at::native::CUDNN_CHECK(cudnnCreateActivationDescriptor(&activationDesc));
at::native::CUDNN_CHECK(cudnnSetActivationDescriptor(
activationDesc,
/*mode=*/CUDNN_ACTIVATION_RELU,
/*reluNanOpt=*/CUDNN_PROPAGATE_NAN,
/*coef=*/1.));
// Step 3: Apply CuDNN function
float alpha = 1.;
float beta = 0.;
at::native::CUDNN_CHECK(cudnnActivationForward(
cuDnn,
activationDesc,
&alpha,
input_tensor_desc.desc(),
inputs.data_ptr(),
&beta,
input_tensor_desc.desc(), // output descriptor same as input
outputs.data_ptr()));
// Step 4: Destroy descriptors
at::native::CUDNN_CHECK(cudnnDestroyActivationDescriptor(activationDesc));
// Step 5: Return something (optional)
}

// Create the pybind11 module
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
// Use the same name as the check functions so error messages make sense
m.def(cudnn_relu_name, &cudnn_relu, "CuDNN ReLU");
}
27 changes: 27 additions & 0 deletions test/test_cpp_extensions.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import unittest
import sys

import torch
import torch.utils.cpp_extension
import torch.backends.cudnn
try:
import torch_test_cpp_extension.cpp as cpp_extension
except ImportError:
Expand All @@ -13,6 +15,7 @@

from torch.utils.cpp_extension import CUDA_HOME
TEST_CUDA = torch.cuda.is_available() and CUDA_HOME is not None
TEST_CUDNN = TEST_CUDA and torch.backends.cudnn.is_available()


class TestCppExtension(common.TestCase):
Expand Down Expand Up @@ -100,6 +103,30 @@ def test_jit_cuda_extension(self):
# 2 * sigmoid(0) = 2 * 0.5 = 1
self.assertEqual(z, torch.ones_like(z))

@unittest.skipIf(not TEST_CUDNN, "CuDNN not found")
def test_jit_cudnn_extension(self):
# implementation of CuDNN ReLU
if sys.platform == 'win32':
extra_ldflags = ['cudnn.lib']
else:
extra_ldflags = ['-lcudnn']
module = torch.utils.cpp_extension.load(
name='torch_test_cudnn_extension',
sources=[
'cpp_extensions/cudnn_extension.cpp'
],
extra_ldflags=extra_ldflags,
verbose=True,
with_cuda=True)

x = torch.randn(100, device='cuda', dtype=torch.float32)
y = torch.zeros(100, device='cuda', dtype=torch.float32)
module.cudnn_relu(x, y) # y=relu(x)
self.assertEqual(torch.nn.functional.relu(x), y)
with self.assertRaisesRegex(RuntimeError, "same size"):
y_incorrect = torch.zeros(20, device='cuda', dtype=torch.float32)
module.cudnn_relu(x, y_incorrect)

def test_optional(self):
has_value = cpp_extension.function_taking_optional(torch.ones(5))
self.assertTrue(has_value)
Expand Down
32 changes: 23 additions & 9 deletions torch/utils/cpp_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,8 @@ def load(name,
extra_ldflags=None,
extra_include_paths=None,
build_directory=None,
verbose=False):
verbose=False,

This comment was marked as off-topic.

This comment was marked as off-topic.

This comment was marked as off-topic.

This comment was marked as off-topic.

This comment was marked as off-topic.

This comment was marked as off-topic.

This comment was marked as off-topic.

with_cuda=None):
'''
Loads a PyTorch C++ extension just-in-time (JIT).

Expand Down Expand Up @@ -464,6 +465,11 @@ def load(name,
to the build.
build_directory: optional path to use as build workspace.
verbose: If ``True``, turns on verbose logging of load steps.
with_cuda: Determines whether CUDA headers and libraries are added to
the build. If set to ``None`` (default), this value is
automatically determined based on the existence of ``.cu`` or
``.cuh`` in ``sources``. Set it to `True`` to force CUDA headers
and libraries to be included.

Returns:
The loaded PyTorch extension as a Python module.
Expand All @@ -484,7 +490,8 @@ def load(name,
extra_ldflags,
extra_include_paths,
build_directory or _get_build_directory(name, verbose),
verbose)
verbose,
with_cuda=with_cuda)


def load_inline(name,
Expand All @@ -496,7 +503,8 @@ def load_inline(name,
extra_ldflags=None,
extra_include_paths=None,
build_directory=None,
verbose=False):
verbose=False,
with_cuda=None):
'''
Loads a PyTorch C++ extension just-in-time (JIT) from string sources.

Expand Down Expand Up @@ -538,6 +546,11 @@ def load_inline(name,
functions: A list of function names for which to generate function
bindings. If a dictionary is given, it should map function names to
docstrings (which are otherwise just the function names).
with_cuda: Determines whether CUDA headers and libraries are added to
the build. If set to ``None`` (default), this value is
automatically determined based on whether ``cuda_sources`` is
provided. Set it to `True`` to force CUDA headers
and libraries to be included.

Example:
>>> from torch.utils.cpp_extension import load_inline
Expand All @@ -552,8 +565,6 @@ def load_inline(name,
'''
build_directory = build_directory or _get_build_directory(name, verbose)

source_files = []

if isinstance(cpp_sources, str):
cpp_sources = [cpp_sources]
cuda_sources = cuda_sources or []
Expand Down Expand Up @@ -606,7 +617,8 @@ def load_inline(name,
extra_ldflags,
extra_include_paths,
build_directory,
verbose)
verbose,
with_cuda=with_cuda)


def _jit_compile(name,
Expand All @@ -616,13 +628,15 @@ def _jit_compile(name,
extra_ldflags,
extra_include_paths,
build_directory,
verbose):
verbose,
with_cuda=None):
baton = FileBaton(os.path.join(build_directory, 'lock'))
if baton.try_acquire():
try:
verify_ninja_availability()
check_compiler_abi_compatibility(os.environ.get('CXX', 'c++'))
with_cuda = any(map(_is_cuda_file, sources))
if with_cuda is None:
with_cuda = any(map(_is_cuda_file, sources))
extra_ldflags = _prepare_ldflags(
extra_ldflags or [],
with_cuda,
Expand Down Expand Up @@ -837,7 +851,7 @@ def _write_ninja_file(path,
for source_file in sources:
# '/path/to/file.cpp' -> 'file'
file_name = os.path.splitext(os.path.basename(source_file))[0]
if _is_cuda_file(source_file):
if _is_cuda_file(source_file) and with_cuda:
rule = 'cuda_compile'
# Use a different object filename in case a C++ and CUDA file have
# the same filename but different extension (.cpp vs. .cu).
Expand Down