@@ -36,19 +36,17 @@ const cudnn_error_description = @compat(Dict(
3636import Base. show
3737show (io:: IO , error:: CuDNNError ) = print (io, cudnn_error_description[error. code])
3838
39- @windows ? (
40- begin
41- const libcudnn = Libdl. find_library ([" cudnn64_70.dll" , " cudnn64_65.dll" , " cudnn32_70.dll" ,
42- " cudnn32_65.dll" , " cudnn64_4.dll" ], [" " ])
43- @assert (libcudnn != " " ) " Could not find a CUDA neural net DLL [cudnn64_70.dll, cudnn64_65.dll, cudnn32_70.dll, cudnn32_65.dll, cudnn64_4.dll]. See: http://mochajl.readthedocs.io/en/latest/user-guide/backend.html#cuda-backend"
44- end
45- : # linux or mac
46- begin
39+ if is_windows ()
40+ const libcudnn = Libdl. find_library ([" cudnn64_80.dll" , " cudnn64_70.dll" , " cudnn32_80.dll" ,
41+ " cudnn32_70.dll" ], [" " ])
42+ @assert (libcudnn != " " ) " Could not find a CUDA neural net DLL [cudnn64_80.dll, cudnn64_70.dll, cudnn32_80.dll, cudnn32_70.dll]. See: http://mochajl.readthedocs.io/en/latest/user-guide/backend.html#cuda-backend"
43+ else
4744 const libcudnn = Libdl. find_library ([" libcudnn" ], [" " ])
4845 @assert (libcudnn != " " ) " Could not find CUDA neural shared library [libcudnn]. See http://mochajl.readthedocs.io/en/latest/user-guide/backend.html#cuda-backend"
49- end )
46+ end
5047
5148macro cudnncall (fv, argtypes, args... )
49+ args = map (esc, args)
5250 f = eval (fv)
5351 quote
5452 _curet = ccall ( ($ (Meta. quot (f)), $ libcudnn), Cint, $ argtypes, $ (args... ) )
@@ -58,8 +56,8 @@ macro cudnncall(fv, argtypes, args...)
5856 end
5957end
6058
61- typealias Handle Ptr{Void}
62- typealias StreamHandle Ptr{Void}
59+ const Handle = Ptr{Void}
60+ const StreamHandle = Ptr{Void}
6361
6462function create ()
6563 handle = Handle[0 ]
@@ -79,10 +77,10 @@ function get_stream(handle::Handle)
7977end
8078
8179# Data structures to represent Image/Filter and the Neural Network Layer
82- typealias Tensor4dDescriptor Ptr{Void}
83- typealias ConvolutionDescriptor Ptr{Void}
84- typealias PoolingDescriptor Ptr{Void}
85- typealias FilterDescriptor Ptr{Void}
80+ const Tensor4dDescriptor = Ptr{Void}
81+ const ConvolutionDescriptor = Ptr{Void}
82+ const PoolingDescriptor = Ptr{Void}
83+ const FilterDescriptor = Ptr{Void}
8684
8785const CUDNN_DATA_FLOAT = 0
8886const CUDNN_DATA_DOUBLE = 1
108106const CUDNN_TENSOR_NCHW = 0 # row major (wStride = 1, hStride = w)
109107const CUDNN_TENSOR_NHWC = 1 # feature maps interleaved ( cStride = 1 )
110108
109+ # cudnnNanPropagation_t
110+ const CUDNN_NOT_PROPAGATE_NAN = 0
111+ const CUDNN_PROPAGATE_NAN = 1
112+
111113function create_tensor4d_descriptor ()
112114 desc = Tensor4dDescriptor[0 ]
113115 @cudnncall (:cudnnCreateTensorDescriptor , (Tensor4dDescriptor,), desc)
@@ -190,15 +192,19 @@ const CUDNN_CONVOLUTION_FWD = 0 # Tensor Convolution function
190192const CUDNN_CONVOLUTION_WEIGHT_GRAD = 1 # Weight Gradient update function
191193const CUDNN_CONVOLUTION_DATA_GRAD = 2 # Data Gradient update function
192194
195+ # cudnn tensor format
196+ const CUDNN_TENSOR_NCHW = 0 # row major (wStride = 1, hStride = w)
197+ const CUDNN_TENSOR_NHWC = 1 # feature maps interleaved ( cStride = 1 )
198+
193199function create_filter_descriptor ()
194200 desc = FilterDescriptor[0 ]
195201 @cudnncall (:cudnnCreateFilterDescriptor , (Ptr{FilterDescriptor},), desc)
196202 return desc[1 ]
197203end
198204function set_filter_descriptor {T<:AbstractFloat} (desc:: FilterDescriptor , dtype:: Type{T} , dims :: NTuple{4, Int} )
199205 w,h,c,k = dims
200- @cudnncall (:cudnnSetFilter4dDescriptor , (FilterDescriptor, Cint, Cint, Cint, Cint, Cint),
201- desc, cudnn_data_type (dtype), k, c, h, w)
206+ @cudnncall (:cudnnSetFilter4dDescriptor , (FilterDescriptor, Cint, Cint, Cint, Cint, Cint, Cint ),
207+ desc, cudnn_data_type (dtype), CUDNN_TENSOR_NCHW, k, c, h, w)
202208 end
203209function create_filter_descriptor (dtype:: Type , dims :: NTuple{4, Int} )
204210 desc = create_filter_descriptor ()
@@ -207,9 +213,10 @@ function create_filter_descriptor(dtype::Type, dims :: NTuple{4, Int})
207213end
208214function get_filter_descriptor (desc:: FilterDescriptor )
209215 k = Cint[0 ]; c = Cint[0 ]; h = Cint[0 ]; w = Cint[0 ]
210- dtype = Cint[0 ]
211- @cudnncall (:cudnnGetFilterDescriptor , (FilterDescriptor,Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint}),
212- desc, dtype, k, c, h, w)
216+ dtype = Cint[0 ]; tensor_format = Cint[0 ]
217+ @cudnncall (:cudnnGetFilterDescriptor , (FilterDescriptor,Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint},Ptr{Cint}),
218+ desc, dtype, tensor_format, k, c, h, w)
219+ @assert tensor_format[1 ] == CUDNN_TENSOR_NCHW
213220 return (cudnn_data_type (dtype[1 ]), w[1 ], h[1 ], c[1 ], k[1 ])
214221end
215222function destroy_filter_descriptor (desc:: FilterDescriptor )
@@ -262,13 +269,14 @@ function destroy_convolution_descriptor(desc::ConvolutionDescriptor)
262269 @cudnncall (:cudnnDestroyConvolutionDescriptor , (ConvolutionDescriptor,), desc)
263270end
264271
265- function get_output_tensor4d_dim (desc:: ConvolutionDescriptor , path:: Int )
266- @assert CUDNN_CONVOLUTION_FWD <= path <= CUDNN_CONVOLUTION_DATA_GRAD
267- n = Cint[0 ]; c = Cint[0 ]; h = Cint[0 ]; w = Cint[0 ]
268- @cudnncall (:cudnnGetOutputTensor4dDim , (ConvolutionDescriptor, Cint, Ptr{Void}, Ptr{Void}, Ptr{Void}, Ptr{Void}),
269- desc, path, n, c, h, w)
270- return (w[1 ], h[1 ], c[1 ], n[1 ])
271- end
272+ # TODO : remove this, and path definition above
273+ # -- function get_output_tensor4d_dim(desc::ConvolutionDescriptor, path::Int)
274+ # -- @assert CUDNN_CONVOLUTION_FWD <= path <= CUDNN_CONVOLUTION_DATA_GRAD
275+ # -- n = Cint[0]; c = Cint[0]; h = Cint[0]; w = Cint[0]
276+ # -- @cudnncall(:cudnnGetOutputTensor4dDim, (ConvolutionDescriptor, Cint, Ptr{Void}, Ptr{Void}, Ptr{Void}, Ptr{Void}),
277+ # -- desc, path, n, c, h, w)
278+ # -- return (w[1], h[1], c[1], n[1])
279+ # -- end
272280
273281const CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0
274282const CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMPT_GEMM = 1
@@ -339,24 +347,50 @@ function convolution_backward_filter{T<:AbstractFloat}(handle::Handle, alpha::T,
339347 beta:: T , grad_desc:: FilterDescriptor , grad:: CuPtr )
340348 alpha_ptr = T[alpha]
341349 beta_ptr = T[beta]
342- @cudnncall (:cudnnConvolutionBackwardFilter_v2 , (Handle, Ptr{Void}, Tensor4dDescriptor, Ptr{Void},
343- Tensor4dDescriptor, Ptr{Void},
344- ConvolutionDescriptor,
345- Ptr{Void}, FilterDescriptor, Ptr{Void}),
346- handle, alpha_ptr, src_desc, src. p, diff_desc, diff. p, conv, beta_ptr, grad_desc, grad. p)
350+
351+ # XXX : properly query algorithm and allocate workspace
352+ bwd_filter_algor = 0
353+ workspace = C_NULL
354+ workspace_size = 0
355+
356+ @cudnncall (:cudnnConvolutionBackwardFilter ,
357+ (Handle,
358+ Ptr{Void}, # const void *alpha
359+ Tensor4dDescriptor, # const cudnnTensorDescriptor_t xDesc
360+ Ptr{Void}, # const void *x
361+ Tensor4dDescriptor, # const cudnnTensorDescroptor_t dyDesc
362+ Ptr{Void}, # const void *dy
363+ ConvolutionDescriptor, # const cudnnConvolutionDescriptor_t
364+ Cint, # cudnnConvolutionBwdFilterAlgo_t
365+ Ptr{Void}, # void *workSpace
366+ Csize_t, # size_t workSpaceSizeInBytes
367+ Ptr{Void}, # const void *beta
368+ FilterDescriptor, # const cudnnFilterDescriptor_t dwDesc
369+ Ptr{Void}), # void *dw
370+ handle, alpha_ptr, src_desc, src. p, diff_desc, diff. p, conv,
371+ bwd_filter_algor, workspace, workspace_size,
372+ beta_ptr, grad_desc, grad. p)
347373end
348374
349375function convolution_backward_data {T<:AbstractFloat} (handle:: Handle , alpha:: T , filter_desc:: FilterDescriptor , filter:: CuPtr ,
350376 diff_desc:: Tensor4dDescriptor , diff:: CuPtr , conv:: ConvolutionDescriptor ,
351377 beta:: T , grad_desc:: Tensor4dDescriptor , grad:: CuPtr )
352378 alpha_ptr = T[alpha]
353379 beta_ptr = T[beta]
354- @cudnncall (:cudnnConvolutionBackwardData_v2 , (Handle, Ptr{Void}, FilterDescriptor, Ptr{Void},
380+
381+ # XXX : properly query algorithm and allocate workspace
382+ bwd_data_algor = 0
383+ workspace = C_NULL
384+ workspace_size = 0
385+
386+ @cudnncall (:cudnnConvolutionBackwardData , (Handle, Ptr{Void}, FilterDescriptor, Ptr{Void},
355387 Tensor4dDescriptor, Ptr{Void},
356388 ConvolutionDescriptor,
389+ Cint, Ptr{Void}, Csize_t,
357390 Ptr{Void},Tensor4dDescriptor,
358391 Ptr{Void}),
359392 handle, alpha_ptr, filter_desc, filter. p, diff_desc, diff. p, conv,
393+ bwd_data_algor, workspace, workspace_size,
360394 beta_ptr, grad_desc, grad. p)
361395end
362396
@@ -404,8 +438,8 @@ function set_pooling_descriptor(desc::PoolingDescriptor, mode::Int, dims::NTuple
404438 w,h = dims
405439 pad_w, pad_h = padding
406440 stride_w, stride_h = stride
407- @cudnncall (:cudnnSetPooling2dDescriptor , (PoolingDescriptor, Cint, Cint,Cint, Cint,Cint, Cint,Cint),
408- desc, mode, h,w, pad_w, pad_h, stride_h, stride_w)
441+ @cudnncall (:cudnnSetPooling2dDescriptor , (PoolingDescriptor, Cint, Cint, Cint, Cint, Cint,Cint, Cint,Cint),
442+ desc, mode, CUDNN_NOT_PROPAGATE_NAN, h,w, pad_w, pad_h, stride_h, stride_w)
409443end
410444
411445function create_pooling_descriptor (mode:: Int , dims:: NTuple{2,Int} , stride:: NTuple{2,Int} , padding:: NTuple{2,Int} )
@@ -414,14 +448,16 @@ function create_pooling_descriptor(mode::Int, dims::NTuple{2,Int}, stride::NTupl
414448 return desc
415449end
416450
417- function get_pooling_descriptor (desc:: PoolingDescriptor )
418- mode = Cint[0 ]
419- h = Cint[0 ]; w = Cint[0 ]; stride_h = Cint[0 ]; stride_w = Cint[0 ]
420- @cudnncall (:cudnGetPoolingDescriptor , (PoolingDescriptor, Ptr{Cint}, Ptr{Cint}, Ptr{Cint},
421- Ptr{Cint}, Ptr{Cint}),
422- desc, mode, h, w, stride_h, stride_w)
423- return (mode[1 ], (w,h), (stride_w, stride_h))
424- end
451+ # TODO : remove
452+ # -- function get_pooling_descriptor(desc::PoolingDescriptor)
453+ # -- mode = Cint[0]; nan_prop_opt = Cint[0]
454+ # -- h = Cint[0]; w = Cint[0]; stride_h = Cint[0]; stride_w = Cint[0]
455+ # -- @cudnncall(:cudnGetPoolingDescriptor, (PoolingDescriptor, Ptr{Cint}, Ptr{Cint}, Ptr{Cint}, Ptr{Cint},
456+ # -- Ptr{Cint}, Ptr{Cint}),
457+ # -- desc, mode, nan_prop_opt, h, w, stride_h, stride_w)
458+ # --
459+ # -- return (mode[1], (w,h), (stride_w, stride_h))
460+ # -- end
425461function destroy_pooling_descriotpr (desc:: PoolingDescriptor )
426462 @cudnncall (:cudnnDestroyPoolingDescriptor , (PoolingDescriptor,), desc)
427463end
0 commit comments