Skip to content

Commit 1214083

Browse files
authored
ARROW-17135: [C++] Reduce code size in compute/kernels/scalar_compare.cc (apache#13654)
This "leaner" implementation reduces the generated code size of this C++ file from 2307768 bytes to 1192608 bytes in gcc 10.3.0. The benchmarks are also faster (on my avx2 laptop): before ``` ----------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations UserCounters... ----------------------------------------------------------------------------------------------- GreaterArrayArrayInt64/32768/10000 32.1 us 32.1 us 21533 items_per_second=1020.16M/s null_percent=0.01 size=32.768k GreaterArrayArrayInt64/32768/100 32.1 us 32.1 us 21603 items_per_second=1019.27M/s null_percent=1 size=32.768k GreaterArrayArrayInt64/32768/10 32.1 us 32.1 us 21479 items_per_second=1020.82M/s null_percent=10 size=32.768k GreaterArrayArrayInt64/32768/2 32.0 us 32.0 us 21468 items_per_second=1023.12M/s null_percent=50 size=32.768k GreaterArrayArrayInt64/32768/1 32.3 us 32.3 us 21720 items_per_second=1013.44M/s null_percent=100 size=32.768k GreaterArrayArrayInt64/32768/0 31.6 us 31.6 us 21828 items_per_second=1036.94M/s null_percent=0 size=32.768k GreaterArrayScalarInt64/32768/10000 20.8 us 20.8 us 33461 items_per_second=1.57238G/s null_percent=0.01 size=32.768k GreaterArrayScalarInt64/32768/100 20.9 us 20.9 us 33625 items_per_second=1.56611G/s null_percent=1 size=32.768k GreaterArrayScalarInt64/32768/10 20.8 us 20.8 us 33553 items_per_second=1.57338G/s null_percent=10 size=32.768k GreaterArrayScalarInt64/32768/2 20.9 us 20.9 us 33348 items_per_second=1.5687G/s null_percent=50 size=32.768k GreaterArrayScalarInt64/32768/1 20.9 us 20.9 us 33419 items_per_second=1.56879G/s null_percent=100 size=32.768k GreaterArrayScalarInt64/32768/0 20.5 us 20.5 us 34116 items_per_second=1.59837G/s null_percent=0 size=32.768k ``` after ``` ----------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations UserCounters... ----------------------------------------------------------------------------------------------- GreaterArrayArrayInt64/32768/10000 18.1 us 18.1 us 38751 items_per_second=1.81199G/s null_percent=0.01 size=32.768k GreaterArrayArrayInt64/32768/100 17.5 us 17.5 us 39374 items_per_second=1.86821G/s null_percent=1 size=32.768k GreaterArrayArrayInt64/32768/10 19.0 us 19.0 us 33941 items_per_second=1.72066G/s null_percent=10 size=32.768k GreaterArrayArrayInt64/32768/2 18.0 us 18.0 us 39589 items_per_second=1.81817G/s null_percent=50 size=32.768k GreaterArrayArrayInt64/32768/1 18.1 us 18.1 us 39061 items_per_second=1.80719G/s null_percent=100 size=32.768k GreaterArrayArrayInt64/32768/0 17.5 us 17.5 us 39813 items_per_second=1.87031G/s null_percent=0 size=32.768k GreaterArrayScalarInt64/32768/10000 16.3 us 16.3 us 42281 items_per_second=2.01525G/s null_percent=0.01 size=32.768k GreaterArrayScalarInt64/32768/100 16.5 us 16.5 us 42266 items_per_second=1.98195G/s null_percent=1 size=32.768k GreaterArrayScalarInt64/32768/10 16.5 us 16.5 us 41872 items_per_second=1.98615G/s null_percent=10 size=32.768k GreaterArrayScalarInt64/32768/2 16.3 us 16.3 us 42130 items_per_second=2.00447G/s null_percent=50 size=32.768k GreaterArrayScalarInt64/32768/1 16.2 us 16.2 us 42391 items_per_second=2.02296G/s null_percent=100 size=32.768k GreaterArrayScalarInt64/32768/0 15.9 us 15.9 us 43498 items_per_second=2.0614G/s null_percent=0 size=32.768k ``` Authored-by: Wes McKinney <wesm@apache.org> Signed-off-by: Wes McKinney <wesm@apache.org>
1 parent 8a4d611 commit 1214083

6 files changed

Lines changed: 226 additions & 71 deletions

File tree

cpp/src/arrow/compute/kernels/codegen_internal.cc

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@ namespace arrow {
2929
namespace compute {
3030
namespace internal {
3131

32-
Status ExecFail(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
33-
return Status::NotImplemented("This kernel is malformed");
34-
}
35-
3632
const std::vector<std::shared_ptr<DataType>>& ExampleParametricTypes() {
3733
static DataTypeVector example_parametric_types = {
3834
decimal128(12, 2),

cpp/src/arrow/compute/kernels/codegen_internal.h

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -964,8 +964,6 @@ struct FailFunctor<VectorKernel::ChunkedExec> {
964964
}
965965
};
966966

967-
Status ExecFail(KernelContext* ctx, const ExecSpan& batch, ExecResult* out);
968-
969967
// GD for numeric types (integer and floating point)
970968
template <template <typename...> class Generator, typename Type0,
971969
typename KernelType = ArrayKernelExec, typename... Args>
@@ -1009,7 +1007,7 @@ ArrayKernelExec GenerateFloatingPoint(detail::GetTypeId get_id) {
10091007
return Generator<Type0, DoubleType, Args...>::Exec;
10101008
default:
10111009
DCHECK(false);
1012-
return ExecFail;
1010+
return nullptr;
10131011
}
10141012
}
10151013

@@ -1037,7 +1035,7 @@ ArrayKernelExec GenerateInteger(detail::GetTypeId get_id) {
10371035
return Generator<Type0, UInt64Type, Args...>::Exec;
10381036
default:
10391037
DCHECK(false);
1040-
return ExecFail;
1038+
return nullptr;
10411039
}
10421040
}
10431041

@@ -1068,7 +1066,7 @@ ArrayKernelExec GeneratePhysicalInteger(detail::GetTypeId get_id) {
10681066
return Generator<Type0, UInt64Type, Args...>::Exec;
10691067
default:
10701068
DCHECK(false);
1071-
return ExecFail;
1069+
return nullptr;
10721070
}
10731071
}
10741072

@@ -1104,8 +1102,9 @@ KernelType ArithmeticExecFromOp(detail::GetTypeId get_id) {
11041102
}
11051103
}
11061104

1107-
template <template <typename... Args> class Generator, typename... Args>
1108-
ArrayKernelExec GeneratePhysicalNumeric(detail::GetTypeId get_id) {
1105+
template <typename ReturnType, template <typename... Args> class Generator,
1106+
typename... Args>
1107+
ReturnType GeneratePhysicalNumericGeneric(detail::GetTypeId get_id) {
11091108
switch (get_id.id) {
11101109
case Type::INT8:
11111110
return Generator<Int8Type, Args...>::Exec;
@@ -1135,9 +1134,13 @@ ArrayKernelExec GeneratePhysicalNumeric(detail::GetTypeId get_id) {
11351134
return Generator<DoubleType, Args...>::Exec;
11361135
default:
11371136
DCHECK(false);
1138-
return ExecFail;
1137+
return nullptr;
11391138
}
11401139
}
1140+
template <template <typename... Args> class Generator, typename... Args>
1141+
ArrayKernelExec GeneratePhysicalNumeric(detail::GetTypeId get_id) {
1142+
return GeneratePhysicalNumericGeneric<ArrayKernelExec, Generator, Args...>(get_id);
1143+
}
11411144

11421145
// Generate a kernel given a templated functor for decimal types
11431146
template <template <typename... Args> class Generator, typename... Args>
@@ -1149,7 +1152,7 @@ ArrayKernelExec GenerateDecimalToDecimal(detail::GetTypeId get_id) {
11491152
return Generator<Decimal256Type, Args...>::Exec;
11501153
default:
11511154
DCHECK(false);
1152-
return ExecFail;
1155+
return nullptr;
11531156
}
11541157
}
11551158

@@ -1169,7 +1172,7 @@ ArrayKernelExec GenerateSignedInteger(detail::GetTypeId get_id) {
11691172
return Generator<Type0, Int64Type, Args...>::Exec;
11701173
default:
11711174
DCHECK(false);
1172-
return ExecFail;
1175+
return nullptr;
11731176
}
11741177
}
11751178

@@ -1249,7 +1252,7 @@ ArrayKernelExec GenerateVarBinaryToVarBinary(detail::GetTypeId get_id) {
12491252
return Generator<LargeStringType, Args...>::Exec;
12501253
default:
12511254
DCHECK(false);
1252-
return ExecFail;
1255+
return nullptr;
12531256
}
12541257
}
12551258

@@ -1270,7 +1273,7 @@ ArrayKernelExec GenerateVarBinaryBase(detail::GetTypeId get_id) {
12701273
return Generator<Type0, LargeBinaryType, Args...>::Exec;
12711274
default:
12721275
DCHECK(false);
1273-
return ExecFail;
1276+
return nullptr;
12741277
}
12751278
}
12761279

@@ -1288,7 +1291,7 @@ ArrayKernelExec GenerateVarBinary(detail::GetTypeId get_id) {
12881291
return Generator<Type0, LargeStringType, Args...>::Exec;
12891292
default:
12901293
DCHECK(false);
1291-
return ExecFail;
1294+
return nullptr;
12921295
}
12931296
}
12941297

@@ -1312,7 +1315,7 @@ ArrayKernelExec GenerateTemporal(detail::GetTypeId get_id) {
13121315
return Generator<Type0, TimestampType, Args...>::Exec;
13131316
default:
13141317
DCHECK(false);
1315-
return ExecFail;
1318+
return nullptr;
13161319
}
13171320
}
13181321

@@ -1328,7 +1331,7 @@ ArrayKernelExec GenerateDecimal(detail::GetTypeId get_id) {
13281331
return Generator<Type0, Decimal256Type, Args...>::Exec;
13291332
default:
13301333
DCHECK(false);
1331-
return ExecFail;
1334+
return nullptr;
13321335
}
13331336
}
13341337

cpp/src/arrow/compute/kernels/scalar_arithmetic.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,7 +1021,7 @@ ArrayKernelExec TypeAgnosticBitWiseExecFromOp(detail::GetTypeId get_id) {
10211021
return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
10221022
default:
10231023
DCHECK(false);
1024-
return ExecFail;
1024+
return nullptr;
10251025
}
10261026
}
10271027

@@ -1046,7 +1046,7 @@ ArrayKernelExec ShiftExecFromOp(detail::GetTypeId get_id) {
10461046
return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
10471047
default:
10481048
DCHECK(false);
1049-
return ExecFail;
1049+
return nullptr;
10501050
}
10511051
}
10521052

@@ -1059,7 +1059,7 @@ ArrayKernelExec GenerateArithmeticFloatingPoint(detail::GetTypeId get_id) {
10591059
return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
10601060
default:
10611061
DCHECK(false);
1062-
return ExecFail;
1062+
return nullptr;
10631063
}
10641064
}
10651065

@@ -1188,7 +1188,7 @@ ArrayKernelExec GenerateArithmeticWithFixedIntOutType(detail::GetTypeId get_id)
11881188
return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
11891189
default:
11901190
DCHECK(false);
1191-
return ExecFail;
1191+
return nullptr;
11921192
}
11931193
}
11941194

0 commit comments

Comments
 (0)