Skip to content

Commit d192117

Browse files
author
Marina Kolpakova
committed
GPU resize with INTER_AREA
1 parent bc54195 commit d192117

File tree

8 files changed

+273
-28
lines changed

8 files changed

+273
-28
lines changed

modules/gpu/src/cuda/resize.cu

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "opencv2/gpu/device/vec_math.hpp"
4747
#include "opencv2/gpu/device/saturate_cast.hpp"
4848
#include "opencv2/gpu/device/filters.hpp"
49+
# include <cfloat>
4950

5051
namespace cv { namespace gpu { namespace device
5152
{
@@ -65,6 +66,17 @@ namespace cv { namespace gpu { namespace device
6566
}
6667
}
6768

69+
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
70+
{
71+
const int x = blockDim.x * blockIdx.x + threadIdx.x;
72+
const int y = blockDim.y * blockIdx.y + threadIdx.y;
73+
74+
if (x < dst.cols && y < dst.rows)
75+
{
76+
dst(y, x) = saturate_cast<T>(src(y, x));
77+
}
78+
}
79+
6880
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
6981
{
7082
static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
@@ -74,13 +86,43 @@ namespace cv { namespace gpu { namespace device
7486

7587
BrdReplicate<T> brd(src.rows, src.cols);
7688
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
77-
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
89+
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
7890

7991
resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
8092
cudaSafeCall( cudaGetLastError() );
8193
}
8294
};
8395

96+
template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
97+
{
98+
static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
99+
{
100+
dim3 block(32, 8);
101+
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
102+
103+
BrdConstant<T> brd(src.rows, src.cols);
104+
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
105+
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
106+
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
107+
cudaSafeCall( cudaGetLastError() );
108+
}
109+
};
110+
111+
template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
112+
{
113+
static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
114+
{
115+
dim3 block(32, 8);
116+
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
117+
118+
BrdConstant<T> brd(src.rows, src.cols);
119+
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
120+
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
121+
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
122+
cudaSafeCall( cudaGetLastError() );
123+
}
124+
};
125+
84126
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
85127
{
86128
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst)
@@ -169,14 +211,31 @@ namespace cv { namespace gpu { namespace device
169211
}
170212
};
171213

214+
template <typename T> struct ResizeDispatcher<AreaFilter, T>
215+
{
216+
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
217+
{
218+
int iscale_x = round(fx);
219+
int iscale_y = round(fy);
220+
221+
if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
222+
ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);
223+
else
224+
ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);
225+
}
226+
};
227+
172228
template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,
173229
DevMem2Db dst, int interpolation, cudaStream_t stream)
174230
{
175231
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream);
176232

177-
static const caller_t callers[3] =
233+
static const caller_t callers[4] =
178234
{
179-
ResizeDispatcher<PointFilter, T>::call, ResizeDispatcher<LinearFilter, T>::call, ResizeDispatcher<CubicFilter, T>::call
235+
ResizeDispatcher<PointFilter, T>::call,
236+
ResizeDispatcher<LinearFilter, T>::call,
237+
ResizeDispatcher<CubicFilter, T>::call,
238+
ResizeDispatcher<AreaFilter, T>::call
180239
};
181240

182241
callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy,

modules/gpu/src/opencv2/gpu/device/filters.hpp

Lines changed: 108 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ namespace cv { namespace gpu { namespace device
5555
typedef typename Ptr2D::elem_type elem_type;
5656
typedef float index_type;
5757

58-
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_) : src(src_) {}
58+
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) {}
5959

6060
__device__ __forceinline__ elem_type operator ()(float y, float x) const
6161
{
@@ -70,7 +70,7 @@ namespace cv { namespace gpu { namespace device
7070
typedef typename Ptr2D::elem_type elem_type;
7171
typedef float index_type;
7272

73-
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_) : src(src_) {}
73+
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) {}
7474

7575
__device__ __forceinline__ elem_type operator ()(float y, float x) const
7676
{
@@ -107,7 +107,7 @@ namespace cv { namespace gpu { namespace device
107107
typedef float index_type;
108108
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
109109

110-
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_) : src(src_) {}
110+
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) {}
111111

112112
static __device__ __forceinline__ float bicubicCoeff(float x_)
113113
{
@@ -154,6 +154,111 @@ namespace cv { namespace gpu { namespace device
154154

155155
const Ptr2D src;
156156
};
157+
// for integer scaling
158+
template <typename Ptr2D> struct IntegerAreaFilter
159+
{
160+
typedef typename Ptr2D::elem_type elem_type;
161+
typedef float index_type;
162+
163+
explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
164+
: src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
165+
166+
__device__ __forceinline__ elem_type operator ()(float y, float x) const
167+
{
168+
float fsx1 = x * scale_x;
169+
float fsx2 = fsx1 + scale_x;
170+
171+
int sx1 = __float2int_ru(fsx1);
172+
int sx2 = __float2int_rd(fsx2);
173+
174+
float fsy1 = y * scale_y;
175+
float fsy2 = fsy1 + scale_y;
176+
177+
int sy1 = __float2int_ru(fsy1);
178+
int sy2 = __float2int_rd(fsy2);
179+
180+
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
181+
work_type out = VecTraits<work_type>::all(0.f);
182+
183+
for(int dy = sy1; dy < sy2; ++dy)
184+
for(int dx = sx1; dx < sx2; ++dx)
185+
{
186+
out = out + src(dy, dx) * scale;
187+
}
188+
189+
return saturate_cast<elem_type>(out);
190+
}
191+
192+
const Ptr2D src;
193+
float scale_x, scale_y ,scale;
194+
};
195+
196+
template <typename Ptr2D> struct AreaFilter
197+
{
198+
typedef typename Ptr2D::elem_type elem_type;
199+
typedef float index_type;
200+
201+
explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
202+
: src(src_), scale_x(scale_x_), scale_y(scale_y_){}
203+
204+
__device__ __forceinline__ elem_type operator ()(float y, float x) const
205+
{
206+
float fsx1 = x * scale_x;
207+
float fsx2 = fsx1 + scale_x;
208+
209+
int sx1 = __float2int_ru(fsx1);
210+
int sx2 = __float2int_rd(fsx2);
211+
212+
float fsy1 = y * scale_y;
213+
float fsy2 = fsy1 + scale_y;
214+
215+
int sy1 = __float2int_ru(fsy1);
216+
int sy2 = __float2int_rd(fsy2);
217+
218+
float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
219+
220+
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
221+
work_type out = VecTraits<work_type>::all(0.f);
222+
223+
for (int dy = sy1; dy < sy2; ++dy)
224+
{
225+
for (int dx = sx1; dx < sx2; ++dx)
226+
out = out + src(dy, dx) * scale;
227+
228+
if (sx1 > fsx1)
229+
out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
230+
231+
if (sx2 < fsx2)
232+
out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
233+
}
234+
235+
if (sy1 > fsy1)
236+
for (int dx = sx1; dx < sx2; ++dx)
237+
out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
238+
239+
if (sy2 < fsy2)
240+
for (int dx = sx1; dx < sx2; ++dx)
241+
out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
242+
243+
if ((sy1 > fsy1) && (sx1 > fsx1))
244+
out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
245+
246+
if ((sy1 > fsy1) && (sx2 < fsx2))
247+
out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
248+
249+
if ((sy2 < fsy2) && (sx2 < fsx2))
250+
out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
251+
252+
if ((sy2 < fsy2) && (sx1 > fsx1))
253+
out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
254+
255+
return saturate_cast<elem_type>(out);
256+
}
257+
258+
const Ptr2D src;
259+
float scale_x, scale_y;
260+
int width, haight;
261+
};
157262
}}} // namespace cv { namespace gpu { namespace device
158263

159264
#endif // __OPENCV_GPU_FILTERS_HPP__

modules/gpu/src/opencv2/gpu/device/vec_traits.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,15 +221,15 @@ namespace cv { namespace gpu { namespace device
221221

222222
template<> struct VecTraits<char>
223223
{
224-
typedef char elem_type;
224+
typedef char elem_type;
225225
enum {cn=1};
226226
static __device__ __host__ __forceinline__ char all(char v) {return v;}
227227
static __device__ __host__ __forceinline__ char make(char x) {return x;}
228228
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
229229
};
230230
template<> struct VecTraits<schar>
231231
{
232-
typedef schar elem_type;
232+
typedef schar elem_type;
233233
enum {cn=1};
234234
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
235235
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}

modules/gpu/src/resize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ namespace cv { namespace gpu { namespace device
6161
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
6262
{
6363
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
64-
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
64+
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR
65+
|| interpolation == INTER_CUBIC || interpolation == INTER_AREA);
6566
CV_Assert(!(dsize == Size()) || (fx > 0 && fy > 0));
6667

6768
if (dsize == Size())

modules/gpu/test/test_resize.cpp

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@
4848

4949
namespace
5050
{
51-
template <typename T, template <typename> class Interpolator> void resizeImpl(const cv::Mat& src, cv::Mat& dst, double fx, double fy)
51+
template <typename T, template <typename> class Interpolator>
52+
void resizeImpl(const cv::Mat& src, cv::Mat& dst, double fx, double fy)
5253
{
5354
const int cn = src.channels();
5455

@@ -156,6 +157,51 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Resize, testing::Combine(
156157
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
157158
WHOLE_SUBMAT));
158159

160+
161+
/////////////////
162+
PARAM_TEST_CASE(ResizeArea, cv::gpu::DeviceInfo, cv::Size, MatType, double, Interpolation, UseRoi)
163+
{
164+
cv::gpu::DeviceInfo devInfo;
165+
cv::Size size;
166+
double coeff;
167+
int interpolation;
168+
int type;
169+
bool useRoi;
170+
171+
virtual void SetUp()
172+
{
173+
devInfo = GET_PARAM(0);
174+
size = GET_PARAM(1);
175+
type = GET_PARAM(2);
176+
coeff = GET_PARAM(3);
177+
interpolation = GET_PARAM(4);
178+
useRoi = GET_PARAM(5);
179+
180+
cv::gpu::setDevice(devInfo.deviceID());
181+
}
182+
};
183+
184+
TEST_P(ResizeArea, Accuracy)
185+
{
186+
cv::Mat src = randomMat(size, type);
187+
188+
cv::gpu::GpuMat dst = createMat(cv::Size(cv::saturate_cast<int>(src.cols * coeff), cv::saturate_cast<int>(src.rows * coeff)), type, useRoi);
189+
cv::gpu::resize(loadMat(src, useRoi), dst, cv::Size(), coeff, coeff, interpolation);
190+
191+
cv::Mat dst_cpu;
192+
cv::resize(src, dst_cpu, cv::Size(), coeff, coeff, interpolation);
193+
194+
EXPECT_MAT_NEAR(dst_cpu, dst, src.depth() == CV_32F ? 1e-2 : 1.0);
195+
}
196+
197+
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeArea, testing::Combine(
198+
ALL_DEVICES,
199+
DIFFERENT_SIZES,
200+
testing::Values(MatType(CV_8UC3), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
201+
testing::Values(/*0.3,*/0.5),
202+
testing::Values(Interpolation(cv::INTER_AREA)),
203+
WHOLE_SUBMAT));
204+
159205
///////////////////////////////////////////////////////////////////
160206
// Test NPP
161207

modules/gpu/test/utility.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ IMPLEMENT_PARAM_CLASS(Channels, int)
277277

278278
CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
279279

280-
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC)
280+
CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::INTER_AREA)
281281

282282
CV_ENUM(BorderType, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP)
283283
#define ALL_BORDER_TYPES testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP))

0 commit comments

Comments
 (0)