Skip to content

Commit 132416e

Browse files
committed
It is unnecessary to use fma() if no scaling.
Signed-off-by: Yan Wang <yan.wang@linux.intel.com>
1 parent dac071e commit 132416e

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

modules/core/src/opencl/convert.cl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,10 @@
5353

5454
__kernel void convertTo(__global const uchar * srcptr, int src_step, int src_offset,
5555
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
56-
WT alpha, WT beta, int rowsPerWI)
56+
#ifndef NO_SCALE
57+
WT alpha, WT beta,
58+
#endif
59+
int rowsPerWI)
5760
{
5861
int x = get_global_id(0);
5962
int y0 = get_global_id(1) * rowsPerWI;
@@ -68,7 +71,11 @@ __kernel void convertTo(__global const uchar * srcptr, int src_step, int src_off
6871
__global const srcT * src = (__global const srcT *)(srcptr + src_index);
6972
__global dstT * dst = (__global dstT *)(dstptr + dst_index);
7073

74+
#ifdef NO_SCALE
75+
dst[0] = convertToDT(src[0]);
76+
#else
7177
dst[0] = convertToDT(fma(convertToWT(src[0]), alpha, beta));
78+
#endif
7279
}
7380
}
7481
}

modules/core/src/umatrix.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) con
746746
ocl::typeToStr(sdepth), ocl::typeToStr(wdepth), ocl::typeToStr(ddepth),
747747
ocl::convertTypeStr(sdepth, wdepth, 1, cvt[0]),
748748
ocl::convertTypeStr(wdepth, ddepth, 1, cvt[1]),
749-
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
749+
doubleSupport ? " -D DOUBLE_SUPPORT" : "", noScale ? " -D NO_SCALE" : ""));
750750
if (!k.empty())
751751
{
752752
UMat src = *this;
@@ -757,7 +757,9 @@ void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) con
757757
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
758758
dstarg = ocl::KernelArg::WriteOnly(dst, cn);
759759

760-
if (wdepth == CV_32F)
760+
if (noScale)
761+
k.args(srcarg, dstarg, rowsPerWI);
762+
else if (wdepth == CV_32F)
761763
k.args(srcarg, dstarg, alphaf, betaf, rowsPerWI);
762764
else
763765
k.args(srcarg, dstarg, alpha, beta, rowsPerWI);

0 commit comments

Comments
 (0)