Skip to content

Commit fdf35a6

Browse files
committed
fix cudnn compatibility issues
1 parent 05ea259 commit fdf35a6

File tree

5 files changed

+108
-77
lines changed

5 files changed

+108
-77
lines changed

src/cuda/backend.jl

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,24 @@
11
export GPUBackend
22

33
macro defkernels(kernels...)
4+
kernel_names = map(string, kernels)
45
field_defs = map(kernels) do ker
56
:($ker :: CUDA.CuFunction)
67
end
78
type_body = Expr(:block, field_defs...)
89

9-
field_inits = map(kernels) do ker
10-
:(kernels.$ker = CUDA.CuFunction(mod, $(string(ker))))
10+
field_inits = map(kernels, kernel_names) do ker, ker_name
11+
:(kernels.$ker = CUDA.CuFunction(mod, $(ker_name)))
1112
end
1213
field_init_block = Expr(:block, field_inits...)
1314

14-
quote
15-
type $(esc(:MochaKernels))
15+
esc(quote
16+
type MochaKernels
1617
mod :: CUDA.CuModule
1718

1819
$type_body
1920

20-
$(esc(:MochaKernels))() = begin
21+
MochaKernels() = begin
2122
mod_dir = joinpath(dirname(@__FILE__), "kernels")
2223
mod_path = joinpath(mod_dir, "kernels.ptx")
2324

@@ -42,7 +43,7 @@ macro defkernels(kernels...)
4243
return kernels
4344
end
4445
end
45-
end
46+
end)
4647
end
4748

4849
@defkernels(

src/cuda/cublas.jl

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,19 +34,17 @@ const cublas_error_description = @compat(Dict(
3434
import Base.show
3535
show(io::IO, error::CuBLASError) = print(io, cublas_error_description[error.code])
3636

37-
@windows? (
38-
begin
37+
if is_windows()
3938
const libcublas = Libdl.find_library(["cublas64_70.dll", "cublas64_65.dll",
4039
"cublas32_70.dll", "cublas32_65.dll", "cublas64_75.dll"], [""])
4140
@assert (libcublas != "") "Could not find cuBLAS DLL [cublas64_70.dll, cublas64_65.dll, cublas32_70.dll, cublas32_65.dll, cublas64_75.dll]. See: http://mochajl.readthedocs.io/en/latest/user-guide/backend.html#cuda-backend"
42-
end
43-
: # linux or mac
44-
begin
41+
else
4542
const libcublas = Libdl.find_library(["libcublas"], [""])
4643
@assert (libcublas != "") "Could not find cuBLAS DLL [libcublas]. See http://mochajl.readthedocs.io/en/latest/user-guide/backend.html#cuda-backend"
47-
end)
44+
end
4845

4946
macro cublascall(fv, argtypes, args...)
47+
args = map(esc, args)
5048
f = eval(fv)
5149
quote
5250
_curet = ccall( ($(Meta.quot(f)), $libcublas), Cint, $argtypes, $(args...) )
@@ -56,8 +54,8 @@ macro cublascall(fv, argtypes, args...)
5654
end
5755
end
5856

59-
typealias Handle Ptr{Void}
60-
typealias StreamHandle Ptr{Void}
57+
const Handle = Ptr{Void}
58+
const StreamHandle = Ptr{Void}
6159

6260
function create()
6361
handle = Handle[0]

src/cuda/cuda.jl

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,14 @@ module CUDA
44
export CuPtr
55
using Compat
66

7-
@windows? (
8-
begin
7+
if is_windows()
98
const libcuda = Libdl.find_library(["nvcuda.dll"], [""])
10-
end
11-
: # linux or mac
12-
begin
9+
else
1310
const libcuda = Libdl.find_library(["libcuda","libcudart"], [""])
1411
if isempty(libcuda)
1512
error("Libcuda not found via Libdl.find_library! Please check installation and ENV configuration")
1613
end
17-
end)
14+
end
1815

1916
const driver_error_descriptions = @compat(Dict(
2017
0 => "Success",
@@ -145,7 +142,7 @@ end
145142
############################################################
146143
# Memory allocation
147144
############################################################
148-
typealias CUdeviceptr Ptr{Void}
145+
const CUdeviceptr = Ptr{Void}
149146

150147
type CuPtr
151148
p::CUdeviceptr
@@ -210,10 +207,10 @@ end
210207
immutable CuFunction
211208
handle::Ptr{Void}
212209

213-
function CuFunction(md::CuModule, name::ASCIIString)
210+
function CuFunction(md::CuModule, name::String)
214211
a = Array{Ptr{Void}}(1)
215212
@cucall(:cuModuleGetFunction, (Ptr{Ptr{Void}}, Ptr{Void}, Ptr{Cchar}),
216-
a, md.handle, name)
213+
a, md.handle, name)
217214
new(a[1])
218215
end
219216
end
@@ -239,8 +236,7 @@ get_dim_z(g::Int) = 1
239236
get_dim_z(g::@compat(Tuple{Int, Int})) = 1
240237
get_dim_z(g::@compat(Tuple{Int, Int, Int})) = g[3]
241238

242-
using Compat
243-
@compat typealias CuDim Union{Int, Tuple{Int, Int}, Tuple{Int, Int, Int}}
239+
const CuDim = Union{Int, Tuple{Int, Int}, Tuple{Int, Int, Int}}
244240

245241
# Stream management
246242

src/cuda/cudnn.jl

Lines changed: 79 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -36,19 +36,17 @@ const cudnn_error_description = @compat(Dict(
3636
import Base.show
3737
show(io::IO, error::CuDNNError) = print(io, cudnn_error_description[error.code])
3838

39-
@windows? (
40-
begin
41-
const libcudnn = Libdl.find_library(["cudnn64_70.dll", "cudnn64_65.dll", "cudnn32_70.dll",
42-
"cudnn32_65.dll", "cudnn64_4.dll"], [""])
43-
@assert (libcudnn != "") "Could not find a CUDA neural net DLL [cudnn64_70.dll, cudnn64_65.dll, cudnn32_70.dll, cudnn32_65.dll, cudnn64_4.dll]. See: http://mochajl.readthedocs.io/en/latest/user-guide/backend.html#cuda-backend"
44-
end
45-
: # linux or mac
46-
begin
39+
if is_windows()
40+
const libcudnn = Libdl.find_library(["cudnn64_80.dll", "cudnn64_70.dll", "cudnn32_80.dll",
41+
"cudnn32_70.dll"], [""])
42+
@assert (libcudnn != "") "Could not find a CUDA neural net DLL [cudnn64_80.dll, cudnn64_70.dll, cudnn32_80.dll, cudnn32_70.dll]. See: http://mochajl.readthedocs.io/en/latest/user-guide/backend.html#cuda-backend"
43+
else
4744
const libcudnn = Libdl.find_library(["libcudnn"], [""])
4845
@assert (libcudnn != "") "Could not find CUDA neural shared library [libcudnn]. See http://mochajl.readthedocs.io/en/latest/user-guide/backend.html#cuda-backend"
49-
end)
46+
end
5047

5148
macro cudnncall(fv, argtypes, args...)
49+
args = map(esc, args)
5250
f = eval(fv)
5351
quote
5452
_curet = ccall( ($(Meta.quot(f)), $libcudnn), Cint, $argtypes, $(args...) )
@@ -58,8 +56,8 @@ macro cudnncall(fv, argtypes, args...)
5856
end
5957
end
6058

61-
typealias Handle Ptr{Void}
62-
typealias StreamHandle Ptr{Void}
59+
const Handle = Ptr{Void}
60+
const StreamHandle = Ptr{Void}
6361

6462
function create()
6563
handle = Handle[0]
@@ -79,10 +77,10 @@ function get_stream(handle::Handle)
7977
end
8078

8179
# Data structures to represent Image/Filter and the Neural Network Layer
82-
typealias Tensor4dDescriptor Ptr{Void}
83-
typealias ConvolutionDescriptor Ptr{Void}
84-
typealias PoolingDescriptor Ptr{Void}
85-
typealias FilterDescriptor Ptr{Void}
80+
const Tensor4dDescriptor = Ptr{Void}
81+
const ConvolutionDescriptor = Ptr{Void}
82+
const PoolingDescriptor = Ptr{Void}
83+
const FilterDescriptor = Ptr{Void}
8684

8785
const CUDNN_DATA_FLOAT = 0
8886
const CUDNN_DATA_DOUBLE = 1
@@ -108,6 +106,10 @@ end
108106
const CUDNN_TENSOR_NCHW = 0 # row major (wStride = 1, hStride = w)
109107
const CUDNN_TENSOR_NHWC = 1 # feature maps interleaved ( cStride = 1 )
110108

109+
# cudnnNanPropagation_t
110+
const CUDNN_NOT_PROPAGATE_NAN = 0
111+
const CUDNN_PROPAGATE_NAN = 1
112+
111113
function create_tensor4d_descriptor()
112114
desc = Tensor4dDescriptor[0]
113115
@cudnncall(:cudnnCreateTensorDescriptor, (Tensor4dDescriptor,), desc)
@@ -190,15 +192,19 @@ const CUDNN_CONVOLUTION_FWD = 0 # Tensor Convolution function
190192
const CUDNN_CONVOLUTION_WEIGHT_GRAD = 1 # Weight Gradient update function
191193
const CUDNN_CONVOLUTION_DATA_GRAD = 2 # Data Gradient update function
192194

195+
# cudnn tensor format
196+
const CUDNN_TENSOR_NCHW = 0 # row major (wStride = 1, hStride = w)
197+
const CUDNN_TENSOR_NHWC = 1 # feature maps interleaved ( cStride = 1 )
198+
193199
function create_filter_descriptor()
194200
desc = FilterDescriptor[0]
195201
@cudnncall(:cudnnCreateFilterDescriptor, (Ptr{FilterDescriptor},), desc)
196202
return desc[1]
197203
end
198204
function set_filter_descriptor{T<:AbstractFloat}(desc::FilterDescriptor, dtype::Type{T}, dims :: NTuple{4, Int})
199205
w,h,c,k = dims
200-
@cudnncall(:cudnnSetFilter4dDescriptor, (FilterDescriptor, Cint, Cint, Cint, Cint, Cint),
201-
desc, cudnn_data_type(dtype), k, c, h, w)
206+
@cudnncall(:cudnnSetFilter4dDescriptor, (FilterDescriptor, Cint, Cint, Cint, Cint, Cint, Cint),
207+
desc, cudnn_data_type(dtype), CUDNN_TENSOR_NCHW, k, c, h, w)
202208
end
203209
function create_filter_descriptor(dtype::Type, dims :: NTuple{4, Int})
204210
desc = create_filter_descriptor()
@@ -207,9 +213,10 @@ function create_filter_descriptor(dtype::Type, dims :: NTuple{4, Int})
207213
end
208214
function get_filter_descriptor(desc::FilterDescriptor)
209215
k = Cint[0]; c = Cint[0]; h = Cint[0]; w = Cint[0]
210-
dtype = Cint[0]
211-
@cudnncall(:cudnnGetFilterDescriptor, (FilterDescriptor,Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint}),
212-
desc, dtype, k, c, h, w)
216+
dtype = Cint[0]; tensor_format = Cint[0]
217+
@cudnncall(:cudnnGetFilterDescriptor, (FilterDescriptor,Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint}),
218+
desc, dtype, tensor_format, k, c, h, w)
219+
@assert tensor_format[1] == CUDNN_TENSOR_NCHW
213220
return (cudnn_data_type(dtype[1]), w[1], h[1], c[1], k[1])
214221
end
215222
function destroy_filter_descriptor(desc::FilterDescriptor)
@@ -262,13 +269,14 @@ function destroy_convolution_descriptor(desc::ConvolutionDescriptor)
262269
@cudnncall(:cudnnDestroyConvolutionDescriptor, (ConvolutionDescriptor,), desc)
263270
end
264271

265-
function get_output_tensor4d_dim(desc::ConvolutionDescriptor, path::Int)
266-
@assert CUDNN_CONVOLUTION_FWD <= path <= CUDNN_CONVOLUTION_DATA_GRAD
267-
n = Cint[0]; c = Cint[0]; h = Cint[0]; w = Cint[0]
268-
@cudnncall(:cudnnGetOutputTensor4dDim, (ConvolutionDescriptor, Cint, Ptr{Void}, Ptr{Void}, Ptr{Void}, Ptr{Void}),
269-
desc, path, n, c, h, w)
270-
return (w[1], h[1], c[1], n[1])
271-
end
272+
# TODO: remove this, and path definition above
273+
#-- function get_output_tensor4d_dim(desc::ConvolutionDescriptor, path::Int)
274+
#-- @assert CUDNN_CONVOLUTION_FWD <= path <= CUDNN_CONVOLUTION_DATA_GRAD
275+
#-- n = Cint[0]; c = Cint[0]; h = Cint[0]; w = Cint[0]
276+
#-- @cudnncall(:cudnnGetOutputTensor4dDim, (ConvolutionDescriptor, Cint, Ptr{Void}, Ptr{Void}, Ptr{Void}, Ptr{Void}),
277+
#-- desc, path, n, c, h, w)
278+
#-- return (w[1], h[1], c[1], n[1])
279+
#-- end
272280

273281
const CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0
274282
const CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMPT_GEMM = 1
@@ -339,24 +347,50 @@ function convolution_backward_filter{T<:AbstractFloat}(handle::Handle, alpha::T,
339347
beta::T, grad_desc::FilterDescriptor, grad::CuPtr)
340348
alpha_ptr = T[alpha]
341349
beta_ptr = T[beta]
342-
@cudnncall(:cudnnConvolutionBackwardFilter_v2, (Handle, Ptr{Void}, Tensor4dDescriptor, Ptr{Void},
343-
Tensor4dDescriptor, Ptr{Void},
344-
ConvolutionDescriptor,
345-
Ptr{Void}, FilterDescriptor, Ptr{Void}),
346-
handle, alpha_ptr, src_desc, src.p, diff_desc, diff.p, conv, beta_ptr, grad_desc, grad.p)
350+
351+
# XXX: properly query algorithm and allocate workspace
352+
bwd_filter_algor = 0
353+
workspace = C_NULL
354+
workspace_size = 0
355+
356+
@cudnncall(:cudnnConvolutionBackwardFilter,
357+
(Handle,
358+
Ptr{Void}, # const void *alpha
359+
Tensor4dDescriptor, # const cudnnTensorDescriptor_t xDesc
360+
Ptr{Void}, # const void *x
361+
Tensor4dDescriptor, # const cudnnTensorDescroptor_t dyDesc
362+
Ptr{Void}, # const void *dy
363+
ConvolutionDescriptor, # const cudnnConvolutionDescriptor_t
364+
Cint, # cudnnConvolutionBwdFilterAlgo_t
365+
Ptr{Void}, # void *workSpace
366+
Csize_t, # size_t workSpaceSizeInBytes
367+
Ptr{Void}, # const void *beta
368+
FilterDescriptor, # const cudnnFilterDescriptor_t dwDesc
369+
Ptr{Void}), # void *dw
370+
handle, alpha_ptr, src_desc, src.p, diff_desc, diff.p, conv,
371+
bwd_filter_algor, workspace, workspace_size,
372+
beta_ptr, grad_desc, grad.p)
347373
end
348374

349375
function convolution_backward_data{T<:AbstractFloat}(handle::Handle, alpha::T, filter_desc::FilterDescriptor, filter::CuPtr,
350376
diff_desc::Tensor4dDescriptor, diff::CuPtr, conv::ConvolutionDescriptor,
351377
beta::T, grad_desc::Tensor4dDescriptor, grad::CuPtr)
352378
alpha_ptr = T[alpha]
353379
beta_ptr = T[beta]
354-
@cudnncall(:cudnnConvolutionBackwardData_v2, (Handle, Ptr{Void}, FilterDescriptor, Ptr{Void},
380+
381+
# XXX: properly query algorithm and allocate workspace
382+
bwd_data_algor = 0
383+
workspace = C_NULL
384+
workspace_size = 0
385+
386+
@cudnncall(:cudnnConvolutionBackwardData, (Handle, Ptr{Void}, FilterDescriptor, Ptr{Void},
355387
Tensor4dDescriptor, Ptr{Void},
356388
ConvolutionDescriptor,
389+
Cint, Ptr{Void}, Csize_t,
357390
Ptr{Void},Tensor4dDescriptor,
358391
Ptr{Void}),
359392
handle, alpha_ptr, filter_desc, filter.p, diff_desc, diff.p, conv,
393+
bwd_data_algor, workspace, workspace_size,
360394
beta_ptr, grad_desc, grad.p)
361395
end
362396

@@ -404,8 +438,8 @@ function set_pooling_descriptor(desc::PoolingDescriptor, mode::Int, dims::NTuple
404438
w,h = dims
405439
pad_w, pad_h = padding
406440
stride_w, stride_h = stride
407-
@cudnncall(:cudnnSetPooling2dDescriptor, (PoolingDescriptor, Cint, Cint,Cint, Cint,Cint, Cint,Cint),
408-
desc, mode, h,w, pad_w, pad_h, stride_h, stride_w)
441+
@cudnncall(:cudnnSetPooling2dDescriptor, (PoolingDescriptor, Cint, Cint, Cint,Cint, Cint,Cint, Cint,Cint),
442+
desc, mode, CUDNN_NOT_PROPAGATE_NAN, h,w, pad_w, pad_h, stride_h, stride_w)
409443
end
410444

411445
function create_pooling_descriptor(mode::Int, dims::NTuple{2,Int}, stride::NTuple{2,Int}, padding::NTuple{2,Int})
@@ -414,14 +448,16 @@ function create_pooling_descriptor(mode::Int, dims::NTuple{2,Int}, stride::NTupl
414448
return desc
415449
end
416450

417-
function get_pooling_descriptor(desc::PoolingDescriptor)
418-
mode = Cint[0]
419-
h = Cint[0]; w = Cint[0]; stride_h = Cint[0]; stride_w = Cint[0]
420-
@cudnncall(:cudnGetPoolingDescriptor, (PoolingDescriptor, Ptr{Cint}, Ptr{Cint}, Ptr{Cint},
421-
Ptr{Cint}, Ptr{Cint}),
422-
desc, mode, h, w, stride_h, stride_w)
423-
return (mode[1], (w,h), (stride_w, stride_h))
424-
end
451+
#TODO: remove
452+
#-- function get_pooling_descriptor(desc::PoolingDescriptor)
453+
#-- mode = Cint[0]; nan_prop_opt = Cint[0]
454+
#-- h = Cint[0]; w = Cint[0]; stride_h = Cint[0]; stride_w = Cint[0]
455+
#-- @cudnncall(:cudnGetPoolingDescriptor, (PoolingDescriptor, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint},
456+
#-- Ptr{Cint}, Ptr{Cint}),
457+
#-- desc, mode, nan_prop_opt, h, w, stride_h, stride_w)
458+
#--
459+
#-- return (mode[1], (w,h), (stride_w, stride_h))
460+
#-- end
425461
function destroy_pooling_descriotpr(desc::PoolingDescriptor)
426462
@cudnncall(:cudnnDestroyPoolingDescriptor, (PoolingDescriptor,), desc)
427463
end

0 commit comments

Comments
 (0)