Skip to content

Commit 1769888

Browse files
lidavidmianmcook
authored andcommitted
ARROW-12843: [C++][R] Implement is_inf kernel
Closes apache#10375 from lidavidm/arrow-12843 Lead-authored-by: David Li <li.davidm96@gmail.com> Co-authored-by: Ian Cook <ianmcook@gmail.com> Signed-off-by: Yibo Cai <yibo.cai@arm.com>
1 parent 861b5da commit 1769888

12 files changed

Lines changed: 245 additions & 28 deletions

File tree

cpp/src/arrow/compute/kernels/scalar_validity.cc

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,20 @@ struct IsValidOperator {
6060
}
6161
};
6262

63+
struct IsFiniteOperator {
64+
template <typename OutType, typename InType>
65+
static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
66+
return std::isfinite(value);
67+
}
68+
};
69+
70+
struct IsInfOperator {
71+
template <typename OutType, typename InType>
72+
static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
73+
return std::isinf(value);
74+
}
75+
};
76+
6377
struct IsNullOperator {
6478
static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
6579
checked_cast<BooleanScalar*>(out)->value = !in.is_valid;
@@ -103,19 +117,38 @@ void MakeFunction(std::string name, const FunctionDoc* doc,
103117
DCHECK_OK(registry->AddFunction(std::move(func)));
104118
}
105119

106-
template <typename InType>
107-
void AddIsNanKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) {
108-
DCHECK_OK(
109-
func->AddKernel({ty}, boolean(),
110-
applicator::ScalarUnary<BooleanType, InType, IsNanOperator>::Exec));
120+
template <typename InType, typename Op>
121+
void AddFloatValidityKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) {
122+
DCHECK_OK(func->AddKernel({ty}, boolean(),
123+
applicator::ScalarUnary<BooleanType, InType, Op>::Exec));
124+
}
125+
126+
std::shared_ptr<ScalarFunction> MakeIsFiniteFunction(std::string name,
127+
const FunctionDoc* doc) {
128+
auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
129+
130+
AddFloatValidityKernel<FloatType, IsFiniteOperator>(float32(), func.get());
131+
AddFloatValidityKernel<DoubleType, IsFiniteOperator>(float64(), func.get());
132+
133+
return func;
134+
}
135+
136+
std::shared_ptr<ScalarFunction> MakeIsInfFunction(std::string name,
137+
const FunctionDoc* doc) {
138+
auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
139+
140+
AddFloatValidityKernel<FloatType, IsInfOperator>(float32(), func.get());
141+
AddFloatValidityKernel<DoubleType, IsInfOperator>(float64(), func.get());
142+
143+
return func;
111144
}
112145

113146
std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name,
114147
const FunctionDoc* doc) {
115148
auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
116149

117-
AddIsNanKernel<FloatType>(float32(), func.get());
118-
AddIsNanKernel<DoubleType>(float64(), func.get());
150+
AddFloatValidityKernel<FloatType, IsNanOperator>(float32(), func.get());
151+
AddFloatValidityKernel<DoubleType, IsNanOperator>(float64(), func.get());
119152

120153
return func;
121154
}
@@ -159,6 +192,16 @@ const FunctionDoc is_valid_doc(
159192
"Return true if non-null",
160193
("For each input value, emit true iff the value is valid (non-null)."), {"values"});
161194

195+
const FunctionDoc is_finite_doc(
196+
"Return true if value is finite",
197+
("For each input value, emit true iff the value is finite (not NaN, inf, or -inf)."),
198+
{"values"});
199+
200+
const FunctionDoc is_inf_doc(
201+
"Return true if infinity",
202+
("For each input value, emit true iff the value is infinite (inf or -inf)."),
203+
{"values"});
204+
162205
const FunctionDoc is_null_doc("Return true if null",
163206
("For each input value, emit true iff the value is null."),
164207
{"values"});
@@ -177,6 +220,8 @@ void RegisterScalarValidity(FunctionRegistry* registry) {
177220
registry, MemAllocation::PREALLOCATE,
178221
/*can_write_into_slices=*/true);
179222

223+
DCHECK_OK(registry->AddFunction(MakeIsFiniteFunction("is_finite", &is_finite_doc)));
224+
DCHECK_OK(registry->AddFunction(MakeIsInfFunction("is_inf", &is_inf_doc)));
180225
DCHECK_OK(registry->AddFunction(MakeIsNanFunction("is_nan", &is_nan_doc)));
181226
}
182227

cpp/src/arrow/compute/kernels/scalar_validity_test.cc

Lines changed: 104 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,107 @@ TEST_F(TestBooleanValidityKernels, ScalarIsNull) {
8888
CheckScalarUnary("is_null", MakeNullScalar(float64()), MakeScalar(true));
8989
}
9090

91+
TEST_F(TestFloatValidityKernels, FloatArrayIsFinite) {
92+
// All Inf
93+
CheckScalarUnary("is_finite", ArrayFromJSON(float32(), "[Inf, -Inf, Inf, -Inf, Inf]"),
94+
ArrayFromJSON(boolean(), "[false, false, false, false, false]"));
95+
// No Inf
96+
CheckScalarUnary("is_finite",
97+
ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
98+
ArrayFromJSON(boolean(), "[true, true, true, true, false, null]"));
99+
// Some Inf
100+
CheckScalarUnary("is_finite",
101+
ArrayFromJSON(float32(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
102+
ArrayFromJSON(boolean(), "[true, false, true, false, false, null]"));
103+
}
104+
105+
TEST_F(TestDoubleValidityKernels, DoubleArrayIsFinite) {
106+
// All Inf
107+
CheckScalarUnary("is_finite", ArrayFromJSON(float64(), "[Inf, -Inf, Inf, -Inf, Inf]"),
108+
ArrayFromJSON(boolean(), "[false, false, false, false, false]"));
109+
// No Inf
110+
CheckScalarUnary("is_finite",
111+
ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
112+
ArrayFromJSON(boolean(), "[true, true, true, true, false, null]"));
113+
// Some Inf
114+
CheckScalarUnary("is_finite",
115+
ArrayFromJSON(float64(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
116+
ArrayFromJSON(boolean(), "[true, false, true, false, false, null]"));
117+
}
118+
119+
TEST_F(TestFloatValidityKernels, FloatScalarIsFinite) {
120+
CheckScalarUnary("is_finite", MakeNullScalar(float32()), MakeNullScalar(boolean()));
121+
CheckScalarUnary("is_finite", MakeScalar(42.0f), MakeScalar(true));
122+
CheckScalarUnary("is_finite", MakeScalar(std::nanf("")), MakeScalar(false));
123+
CheckScalarUnary("is_finite", MakeScalar(std::numeric_limits<float>::infinity()),
124+
MakeScalar(false));
125+
CheckScalarUnary("is_finite", MakeScalar(-std::numeric_limits<float>::infinity()),
126+
MakeScalar(false));
127+
}
128+
129+
TEST_F(TestDoubleValidityKernels, DoubleScalarIsFinite) {
130+
CheckScalarUnary("is_finite", MakeNullScalar(float64()), MakeNullScalar(boolean()));
131+
CheckScalarUnary("is_finite", MakeScalar(42.0), MakeScalar(true));
132+
CheckScalarUnary("is_finite", MakeScalar(std::nan("")), MakeScalar(false));
133+
CheckScalarUnary("is_finite", MakeScalar(std::numeric_limits<double>::infinity()),
134+
MakeScalar(false));
135+
CheckScalarUnary("is_finite", MakeScalar(-std::numeric_limits<double>::infinity()),
136+
MakeScalar(false));
137+
}
138+
139+
TEST_F(TestFloatValidityKernels, FloatArrayIsInf) {
140+
// All Inf
141+
CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[Inf, -Inf, Inf, -Inf, Inf]"),
142+
ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
143+
// No Inf
144+
CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
145+
ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
146+
// Some Infs
147+
CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
148+
ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
149+
}
150+
151+
TEST_F(TestDoubleValidityKernels, DoubleArrayIsInf) {
152+
// All Inf
153+
CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[Inf, -Inf, Inf, -Inf, Inf]"),
154+
ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
155+
// No Inf
156+
CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
157+
ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
158+
// Some Infs
159+
CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
160+
ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
161+
}
162+
163+
TEST_F(TestFloatValidityKernels, FloatScalarIsInf) {
164+
CheckScalarUnary("is_inf", MakeNullScalar(float32()), MakeNullScalar(boolean()));
165+
CheckScalarUnary("is_inf", MakeScalar(42.0f), MakeScalar(false));
166+
CheckScalarUnary("is_inf", MakeScalar(std::nanf("")), MakeScalar(false));
167+
CheckScalarUnary("is_inf", MakeScalar(std::numeric_limits<float>::infinity()),
168+
MakeScalar(true));
169+
CheckScalarUnary("is_inf", MakeScalar(-std::numeric_limits<float>::infinity()),
170+
MakeScalar(true));
171+
}
172+
173+
TEST_F(TestDoubleValidityKernels, DoubleScalarIsInf) {
174+
CheckScalarUnary("is_inf", MakeNullScalar(float64()), MakeNullScalar(boolean()));
175+
CheckScalarUnary("is_inf", MakeScalar(42.0), MakeScalar(false));
176+
CheckScalarUnary("is_inf", MakeScalar(std::nan("")), MakeScalar(false));
177+
CheckScalarUnary("is_inf", MakeScalar(std::numeric_limits<double>::infinity()),
178+
MakeScalar(true));
179+
CheckScalarUnary("is_inf", MakeScalar(-std::numeric_limits<double>::infinity()),
180+
MakeScalar(true));
181+
}
182+
91183
TEST_F(TestFloatValidityKernels, FloatArrayIsNan) {
92184
// All NaN
93185
CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[NaN, NaN, NaN, NaN, NaN]"),
94186
ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
95187
// No NaN
96-
CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, 4.0, null]"),
188+
CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, Inf, null]"),
97189
ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
98190
// Some NaNs
99-
CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, NaN, 2.0, NaN, 4.0, null]"),
191+
CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, NaN, 2.0, NaN, Inf, null]"),
100192
ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
101193
}
102194

@@ -105,23 +197,31 @@ TEST_F(TestDoubleValidityKernels, DoubleArrayIsNan) {
105197
CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[NaN, NaN, NaN, NaN, NaN]"),
106198
ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
107199
// No NaN
108-
CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, 4.0, null]"),
200+
CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, Inf, null]"),
109201
ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
110202
// Some NaNs
111-
CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, NaN, 2.0, NaN, 4.0, null]"),
203+
CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, NaN, 2.0, NaN, Inf, null]"),
112204
ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
113205
}
114206

115207
TEST_F(TestFloatValidityKernels, FloatScalarIsNan) {
116208
CheckScalarUnary("is_nan", MakeNullScalar(float32()), MakeNullScalar(boolean()));
117209
CheckScalarUnary("is_nan", MakeScalar(42.0f), MakeScalar(false));
118210
CheckScalarUnary("is_nan", MakeScalar(std::nanf("")), MakeScalar(true));
211+
CheckScalarUnary("is_nan", MakeScalar(std::numeric_limits<float>::infinity()),
212+
MakeScalar(false));
213+
CheckScalarUnary("is_nan", MakeScalar(-std::numeric_limits<float>::infinity()),
214+
MakeScalar(false));
119215
}
120216

121217
TEST_F(TestDoubleValidityKernels, DoubleScalarIsNan) {
122218
CheckScalarUnary("is_nan", MakeNullScalar(float64()), MakeNullScalar(boolean()));
123219
CheckScalarUnary("is_nan", MakeScalar(42.0), MakeScalar(false));
124220
CheckScalarUnary("is_nan", MakeScalar(std::nan("")), MakeScalar(true));
221+
CheckScalarUnary("is_nan", MakeScalar(std::numeric_limits<double>::infinity()),
222+
MakeScalar(false));
223+
CheckScalarUnary("is_nan", MakeScalar(-std::numeric_limits<double>::infinity()),
224+
MakeScalar(false));
125225
}
126226

127227
} // namespace compute

docs/source/cpp/compute.rst

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -648,31 +648,40 @@ Structural transforms
648648
+==========================+============+================================================+=====================+=========+
649649
| fill_null | Binary | Boolean, Null, Numeric, Temporal, String-like | Input type | \(1) |
650650
+--------------------------+------------+------------------------------------------------+---------------------+---------+
651-
| is_nan | Unary | Float, Double | Boolean | \(2) |
651+
| is_finite | Unary | Float, Double | Boolean | \(2) |
652652
+--------------------------+------------+------------------------------------------------+---------------------+---------+
653-
| is_null | Unary | Any | Boolean | \(3) |
653+
| is_inf | Unary | Float, Double | Boolean | \(3) |
654654
+--------------------------+------------+------------------------------------------------+---------------------+---------+
655-
| is_valid | Unary | Any | Boolean | \(4) |
655+
| is_nan | Unary | Float, Double | Boolean | \(4) |
656656
+--------------------------+------------+------------------------------------------------+---------------------+---------+
657-
| list_value_length | Unary | List-like | Int32 or Int64 | \(5) |
657+
| is_null | Unary | Any | Boolean | \(5) |
658658
+--------------------------+------------+------------------------------------------------+---------------------+---------+
659-
| project | Varargs | Any | Struct | \(6) |
659+
| is_valid | Unary | Any | Boolean | \(6) |
660+
+--------------------------+------------+------------------------------------------------+---------------------+---------+
661+
| list_value_length | Unary | List-like | Int32 or Int64 | \(7) |
662+
+--------------------------+------------+------------------------------------------------+---------------------+---------+
663+
| project | Varargs | Any | Struct | \(8) |
660664
+--------------------------+------------+------------------------------------------------+---------------------+---------+
661665

662666
* \(1) First input must be an array, second input a scalar of the same type.
663667
Output is an array of the same type as the inputs, and with the same values
664668
as the first input, except for nulls replaced with the second input value.
665669

666-
* \(2) Output is true iff the corresponding input element is NaN.
670+
* \(2) Output is true iff the corresponding input element is finite (not Infinity,
671+
-Infinity, or NaN).
672+
673+
* \(3) Output is true iff the corresponding input element is Infinity/-Infinity.
674+
675+
* \(4) Output is true iff the corresponding input element is NaN.
667676

668-
* \(3) Output is true iff the corresponding input element is null.
677+
* \(5) Output is true iff the corresponding input element is null.
669678

670-
* \(4) Output is true iff the corresponding input element is non-null.
679+
* \(6) Output is true iff the corresponding input element is non-null.
671680

672-
* \(5) Each output element is the length of the corresponding input element
681+
* \(7) Each output element is the length of the corresponding input element
673682
(null if input is null). Output type is Int32 for List, Int64 for LargeList.
674683

675-
* \(6) The output struct's field types are the types of its arguments. The
684+
* \(8) The output struct's field types are the types of its arguments. The
676685
field names are specified using an instance of :struct:`ProjectOptions`.
677686
The output shape will be scalar if all inputs are scalar, otherwise any
678687
scalars will be broadcast to arrays.

docs/source/python/api/compute.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,9 @@ Structural Transforms
222222

223223
binary_length
224224
fill_null
225+
is_finite
226+
is_inf
227+
is_nan
225228
is_null
226229
is_valid
227230
list_value_length

r/NAMESPACE

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,10 @@ S3method(head,ArrowDatum)
4646
S3method(head,ArrowTabular)
4747
S3method(head,Dataset)
4848
S3method(head,arrow_dplyr_query)
49+
S3method(is.finite,ArrowDatum)
50+
S3method(is.infinite,ArrowDatum)
4951
S3method(is.na,ArrowDatum)
5052
S3method(is.na,Expression)
51-
S3method(is.na,Scalar)
5253
S3method(is.nan,ArrowDatum)
5354
S3method(is_in,ArrowDatum)
5455
S3method(is_in,default)

r/R/arrow-datum.R

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,20 @@ ArrowDatum <- R6Class("ArrowDatum", inherit = ArrowObject,
3232
#' @export
3333
length.ArrowDatum <- function(x) x$length()
3434

35+
#' @export
36+
is.finite.ArrowDatum <- function(x) {
37+
is_fin <- call_function("is_finite", x)
38+
# for compatibility with base::is.finite(), return FALSE for NA_real_
39+
is_fin & !is.na(is_fin)
40+
}
41+
42+
#' @export
43+
is.infinite.ArrowDatum <- function(x) {
44+
is_inf <- call_function("is_inf", x)
45+
# for compatibility with base::is.infinite(), return FALSE for NA_real_
46+
is_inf & !is.na(is_inf)
47+
}
48+
3549
#' @export
3650
is.na.ArrowDatum <- function(x) call_function("is_null", x)
3751

r/R/dplyr-functions.R

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,18 @@ nse_funcs$between <- function(x, left, right) {
7272
x >= left & x <= right
7373
}
7474

75+
nse_funcs$is.finite <- function(x) {
76+
is_fin <- Expression$create("is_finite", x)
77+
# for compatibility with base::is.finite(), return FALSE for NA_real_
78+
is_fin & !nse_funcs$is.na(is_fin)
79+
}
80+
81+
nse_funcs$is.infinite <- function(x) {
82+
is_inf <- Expression$create("is_inf", x)
83+
# for compatibility with base::is.infinite(), return FALSE for NA_real_
84+
is_inf & !nse_funcs$is.na(is_inf)
85+
}
86+
7587
# as.* type casting functions
7688
# as.factor() is mapped in expression.R
7789
nse_funcs$as.character <- function(x) {

r/R/expression.R

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@
2222
"as.factor" = "dictionary_encode",
2323
"is.na" = "is_null",
2424
"is.nan" = "is_nan",
25-
# nchar is defined in dplyr.R because it is more complex
26-
# "nchar" = "utf8_length",
25+
# nchar is defined in dplyr-functions.R
2726
"tolower" = "utf8_lower",
2827
"toupper" = "utf8_upper",
2928
# stringr spellings of those

r/R/scalar.R

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,5 @@ StructScalar <- R6Class("StructScalar",
7272
#' @export
7373
length.Scalar <- function(x) 1L
7474

75-
#' @export
76-
is.na.Scalar <- function(x) !x$is_valid
77-
7875
#' @export
7976
sort.Scalar <- function(x, decreasing = FALSE, ...) x

r/tests/testthat/test-chunked-array.R

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,22 @@ test_that("ChunkedArray handles !!! splicing", {
108108
expect_equal(x$num_chunks, 3L)
109109
})
110110

111+
test_that("ChunkedArray handles Inf", {
112+
data <- list(c(Inf, 2:10), c(1:3, Inf, 5L), 1:10)
113+
x <- chunked_array(!!!data)
114+
expect_equal(x$type, float64())
115+
expect_equal(x$num_chunks, 3L)
116+
expect_equal(length(x), 25L)
117+
expect_equal(as.vector(x), c(c(Inf, 2:10), c(1:3, Inf, 5), 1:10))
118+
119+
chunks <- x$chunks
120+
expect_equal(as.vector(is.infinite(chunks[[2]])), is.infinite(data[[2]]))
121+
expect_equal(
122+
as.vector(is.infinite(x)),
123+
c(is.infinite(data[[1]]), is.infinite(data[[2]]), is.infinite(data[[3]]))
124+
)
125+
})
126+
111127
test_that("ChunkedArray handles NA", {
112128
data <- list(1:10, c(NA, 2:10), c(1:3, NA, 5L))
113129
x <- chunked_array(!!!data)

0 commit comments

Comments
 (0)