Skip to content

Commit 36bf7a4

Browse files
projjalkszucs
authored andcommitted
ARROW-9898: [C++][Gandiva] Fix linking issue with castINT/FLOAT functions
Moving the castint/float functions to gdv_function_stubs outside of precompiled module Closes apache#8096 from projjal/castint and squashes the following commits: 85179a5 <Projjal Chanda> moved castInt to gdv_fn_stubs c09077e <Projjal Chanda> fixed castfloat function ddc429d <Projjal Chanda> added java test case f666f54 <Projjal Chanda> fix error handling in castint Authored-by: Projjal Chanda <iam@pchanda.com> Signed-off-by: Praveen <praveen@dremio.com>
1 parent 3f96cc0 commit 36bf7a4

8 files changed

Lines changed: 442 additions & 162 deletions

File tree

cpp/src/gandiva/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ add_gandiva_test(internals-test
224224
like_holder_test.cc
225225
decimal_type_util_test.cc
226226
random_generator_holder_test.cc
227+
gdv_function_stubs_test.cc
227228
EXTRA_DEPENDENCIES
228229
LLVM::LLVM_INTERFACE
229230
EXTRA_INCLUDES

cpp/src/gandiva/function_registry_string.cc

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
// under the License.
1717

1818
#include "gandiva/function_registry_string.h"
19+
1920
#include "gandiva/function_registry_common.h"
2021

2122
namespace gandiva {
@@ -61,17 +62,26 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
6162
UNARY_SAFE_NULL_NEVER_BOOL_FN(isnull, {}),
6263
UNARY_SAFE_NULL_NEVER_BOOL_FN(isnotnull, {}),
6364

64-
UNARY_UNSAFE_NULL_IF_NULL(castINT, {}, utf8, int32),
65-
UNARY_UNSAFE_NULL_IF_NULL(castBIGINT, {}, utf8, int64),
66-
UNARY_UNSAFE_NULL_IF_NULL(castFLOAT4, {}, utf8, float32),
67-
UNARY_UNSAFE_NULL_IF_NULL(castFLOAT8, {}, utf8, float64),
68-
6965
NativeFunction("upper", {}, DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
7066
"upper_utf8", NativeFunction::kNeedsContext),
7167

7268
NativeFunction("lower", {}, DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
7369
"lower_utf8", NativeFunction::kNeedsContext),
7470

71+
NativeFunction("castINT", {}, DataTypeVector{utf8()}, int32(), kResultNullIfNull,
72+
"gdv_fn_castINT_utf8", NativeFunction::kNeedsContext),
73+
74+
NativeFunction("castBIGINT", {}, DataTypeVector{utf8()}, int64(), kResultNullIfNull,
75+
"gdv_fn_castBIGINT_utf8", NativeFunction::kNeedsContext),
76+
77+
NativeFunction("castFLOAT4", {}, DataTypeVector{utf8()}, float32(),
78+
kResultNullIfNull, "gdv_fn_castFLOAT4_utf8",
79+
NativeFunction::kNeedsContext),
80+
81+
NativeFunction("castFLOAT8", {}, DataTypeVector{utf8()}, float64(),
82+
kResultNullIfNull, "gdv_fn_castFLOAT8_utf8",
83+
NativeFunction::kNeedsContext),
84+
7585
NativeFunction("castVARCHAR", {}, DataTypeVector{utf8(), int64()}, utf8(),
7686
kResultNullIfNull, "castVARCHAR_utf8_int64",
7787
NativeFunction::kNeedsContext),

cpp/src/gandiva/gdv_function_stubs.cc

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <string>
2121
#include <vector>
2222

23+
#include "arrow/util/value_parsing.h"
2324
#include "gandiva/engine.h"
2425
#include "gandiva/exported_funcs.h"
2526
#include "gandiva/in_holder.h"
@@ -150,6 +151,37 @@ char* gdv_fn_dec_to_string(int64_t context, int64_t x_high, uint64_t x_low,
150151
memcpy(ret, dec_str.data(), *dec_str_len);
151152
return ret;
152153
}
154+
155+
#define CAST_NUMERIC_FROM_STRING(OUT_TYPE, ARROW_TYPE, TYPE_NAME) \
156+
GANDIVA_EXPORT \
157+
OUT_TYPE gdv_fn_cast##TYPE_NAME##_utf8(int64_t context, const char* data, \
158+
int32_t len) { \
159+
OUT_TYPE val = 0; \
160+
/* trim leading and trailing spaces */ \
161+
int32_t trimmed_len; \
162+
int32_t start = 0, end = len - 1; \
163+
while (start <= end && data[start] == ' ') { \
164+
++start; \
165+
} \
166+
while (end >= start && data[end] == ' ') { \
167+
--end; \
168+
} \
169+
trimmed_len = end - start + 1; \
170+
const char* trimmed_data = data + start; \
171+
if (!arrow::internal::ParseValue<ARROW_TYPE>(trimmed_data, trimmed_len, &val)) { \
172+
std::string err = \
173+
"Failed to cast the string " + std::string(data, len) + " to " #OUT_TYPE; \
174+
gdv_fn_context_set_error_msg(context, err.c_str()); \
175+
} \
176+
return val; \
177+
}
178+
179+
CAST_NUMERIC_FROM_STRING(int32_t, arrow::Int32Type, INT)
180+
CAST_NUMERIC_FROM_STRING(int64_t, arrow::Int64Type, BIGINT)
181+
CAST_NUMERIC_FROM_STRING(float, arrow::FloatType, FLOAT4)
182+
CAST_NUMERIC_FROM_STRING(double, arrow::DoubleType, FLOAT8)
183+
184+
#undef CAST_NUMERIC_FROM_STRING
153185
}
154186

155187
namespace gandiva {
@@ -277,6 +309,34 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
277309
args = {types->i64_type(), types->i32_type(), types->i1_type()};
278310
engine->AddGlobalMappingForFunc("gdv_fn_random_with_seed", types->double_type(), args,
279311
reinterpret_cast<void*>(gdv_fn_random_with_seed));
312+
313+
args = {types->i64_type(), // int64_t context_ptr
314+
types->i8_ptr_type(), // const char* data
315+
types->i32_type()}; // int32_t lenr
316+
317+
engine->AddGlobalMappingForFunc("gdv_fn_castINT_utf8", types->i32_type(), args,
318+
reinterpret_cast<void*>(gdv_fn_castINT_utf8));
319+
320+
args = {types->i64_type(), // int64_t context_ptr
321+
types->i8_ptr_type(), // const char* data
322+
types->i32_type()}; // int32_t lenr
323+
324+
engine->AddGlobalMappingForFunc("gdv_fn_castBIGINT_utf8", types->i64_type(), args,
325+
reinterpret_cast<void*>(gdv_fn_castBIGINT_utf8));
326+
327+
args = {types->i64_type(), // int64_t context_ptr
328+
types->i8_ptr_type(), // const char* data
329+
types->i32_type()}; // int32_t lenr
330+
331+
engine->AddGlobalMappingForFunc("gdv_fn_castFLOAT4_utf8", types->float_type(), args,
332+
reinterpret_cast<void*>(gdv_fn_castFLOAT4_utf8));
333+
334+
args = {types->i64_type(), // int64_t context_ptr
335+
types->i8_ptr_type(), // const char* data
336+
types->i32_type()}; // int32_t lenr
337+
338+
engine->AddGlobalMappingForFunc("gdv_fn_castFLOAT8_utf8", types->double_type(), args,
339+
reinterpret_cast<void*>(gdv_fn_castFLOAT8_utf8));
280340
}
281341

282342
} // namespace gandiva

cpp/src/gandiva/gdv_function_stubs.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
#include <cstdint>
2121

22+
#include "gandiva/visibility.h"
23+
2224
/// Stub functions that can be accessed from LLVM.
2325
extern "C" {
2426

@@ -52,4 +54,16 @@ int32_t gdv_fn_dec_from_string(int64_t context, const char* in, int32_t in_lengt
5254

5355
char* gdv_fn_dec_to_string(int64_t context, int64_t x_high, uint64_t x_low,
5456
int32_t x_scale, int32_t* dec_str_len);
57+
58+
GANDIVA_EXPORT
59+
int32_t gdv_fn_castINT_utf8(int64_t context, const char* data, int32_t data_len);
60+
61+
GANDIVA_EXPORT
62+
int64_t gdv_fn_castBIGINT_utf8(int64_t context, const char* data, int32_t data_len);
63+
64+
GANDIVA_EXPORT
65+
float gdv_fn_castFLOAT4_utf8(int64_t context, const char* data, int32_t data_len);
66+
67+
GANDIVA_EXPORT
68+
double gdv_fn_castFLOAT8_utf8(int64_t context, const char* data, int32_t data_len);
5569
}
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include "gandiva/gdv_function_stubs.h"
19+
20+
#include <gmock/gmock.h>
21+
#include <gtest/gtest.h>
22+
23+
#include "gandiva/execution_context.h"
24+
25+
namespace gandiva {
26+
27+
TEST(TestGdvFnStubs, TestCastINT) {
28+
gandiva::ExecutionContext ctx;
29+
30+
int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
31+
32+
EXPECT_EQ(gdv_fn_castINT_utf8(ctx_ptr, "-45", 3), -45);
33+
EXPECT_EQ(gdv_fn_castINT_utf8(ctx_ptr, "0", 1), 0);
34+
EXPECT_EQ(gdv_fn_castINT_utf8(ctx_ptr, "2147483647", 10), 2147483647);
35+
EXPECT_EQ(gdv_fn_castINT_utf8(ctx_ptr, "02147483647", 11), 2147483647);
36+
EXPECT_EQ(gdv_fn_castINT_utf8(ctx_ptr, "-2147483648", 11), -2147483648LL);
37+
EXPECT_EQ(gdv_fn_castINT_utf8(ctx_ptr, "-02147483648", 12), -2147483648LL);
38+
EXPECT_EQ(gdv_fn_castINT_utf8(ctx_ptr, " 12 ", 4), 12);
39+
40+
gdv_fn_castINT_utf8(ctx_ptr, "2147483648", 10);
41+
EXPECT_THAT(ctx.get_error(),
42+
::testing::HasSubstr("Failed to cast the string 2147483648 to int32"));
43+
ctx.Reset();
44+
45+
gdv_fn_castINT_utf8(ctx_ptr, "-2147483649", 11);
46+
EXPECT_THAT(ctx.get_error(),
47+
::testing::HasSubstr("Failed to cast the string -2147483649 to int32"));
48+
ctx.Reset();
49+
50+
gdv_fn_castINT_utf8(ctx_ptr, "12.34", 5);
51+
EXPECT_THAT(ctx.get_error(),
52+
::testing::HasSubstr("Failed to cast the string 12.34 to int32"));
53+
ctx.Reset();
54+
55+
gdv_fn_castINT_utf8(ctx_ptr, "abc", 3);
56+
EXPECT_THAT(ctx.get_error(),
57+
::testing::HasSubstr("Failed to cast the string abc to int32"));
58+
ctx.Reset();
59+
60+
gdv_fn_castINT_utf8(ctx_ptr, "", 0);
61+
EXPECT_THAT(ctx.get_error(),
62+
::testing::HasSubstr("Failed to cast the string to int32"));
63+
ctx.Reset();
64+
65+
gdv_fn_castINT_utf8(ctx_ptr, "-", 1);
66+
EXPECT_THAT(ctx.get_error(),
67+
::testing::HasSubstr("Failed to cast the string - to int32"));
68+
ctx.Reset();
69+
}
70+
71+
TEST(TestGdvFnStubs, TestCastBIGINT) {
72+
gandiva::ExecutionContext ctx;
73+
74+
int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
75+
76+
EXPECT_EQ(gdv_fn_castBIGINT_utf8(ctx_ptr, "-45", 3), -45);
77+
EXPECT_EQ(gdv_fn_castBIGINT_utf8(ctx_ptr, "0", 1), 0);
78+
EXPECT_EQ(gdv_fn_castBIGINT_utf8(ctx_ptr, "9223372036854775807", 19),
79+
9223372036854775807LL);
80+
EXPECT_EQ(gdv_fn_castBIGINT_utf8(ctx_ptr, "09223372036854775807", 20),
81+
9223372036854775807LL);
82+
EXPECT_EQ(gdv_fn_castBIGINT_utf8(ctx_ptr, "-9223372036854775808", 20),
83+
-9223372036854775807LL - 1);
84+
EXPECT_EQ(gdv_fn_castBIGINT_utf8(ctx_ptr, "-009223372036854775808", 22),
85+
-9223372036854775807LL - 1);
86+
EXPECT_EQ(gdv_fn_castBIGINT_utf8(ctx_ptr, " 12 ", 4), 12);
87+
88+
gdv_fn_castBIGINT_utf8(ctx_ptr, "9223372036854775808", 19);
89+
EXPECT_THAT(
90+
ctx.get_error(),
91+
::testing::HasSubstr("Failed to cast the string 9223372036854775808 to int64"));
92+
ctx.Reset();
93+
94+
gdv_fn_castBIGINT_utf8(ctx_ptr, "-9223372036854775809", 20);
95+
EXPECT_THAT(
96+
ctx.get_error(),
97+
::testing::HasSubstr("Failed to cast the string -9223372036854775809 to int64"));
98+
ctx.Reset();
99+
100+
gdv_fn_castBIGINT_utf8(ctx_ptr, "12.34", 5);
101+
EXPECT_THAT(ctx.get_error(),
102+
::testing::HasSubstr("Failed to cast the string 12.34 to int64"));
103+
ctx.Reset();
104+
105+
gdv_fn_castBIGINT_utf8(ctx_ptr, "abc", 3);
106+
EXPECT_THAT(ctx.get_error(),
107+
::testing::HasSubstr("Failed to cast the string abc to int64"));
108+
ctx.Reset();
109+
110+
gdv_fn_castBIGINT_utf8(ctx_ptr, "", 0);
111+
EXPECT_THAT(ctx.get_error(),
112+
::testing::HasSubstr("Failed to cast the string to int64"));
113+
ctx.Reset();
114+
115+
gdv_fn_castBIGINT_utf8(ctx_ptr, "-", 1);
116+
EXPECT_THAT(ctx.get_error(),
117+
::testing::HasSubstr("Failed to cast the string - to int64"));
118+
ctx.Reset();
119+
}
120+
121+
TEST(TestGdvFnStubs, TestCastFloat4) {
122+
gandiva::ExecutionContext ctx;
123+
124+
int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
125+
126+
EXPECT_EQ(gdv_fn_castFLOAT4_utf8(ctx_ptr, "-45.34", 6), -45.34f);
127+
EXPECT_EQ(gdv_fn_castFLOAT4_utf8(ctx_ptr, "0", 1), 0.0f);
128+
EXPECT_EQ(gdv_fn_castFLOAT4_utf8(ctx_ptr, "5", 1), 5.0f);
129+
EXPECT_EQ(gdv_fn_castFLOAT4_utf8(ctx_ptr, " 3.4 ", 5), 3.4f);
130+
131+
gdv_fn_castFLOAT4_utf8(ctx_ptr, "", 0);
132+
EXPECT_THAT(ctx.get_error(),
133+
::testing::HasSubstr("Failed to cast the string to float"));
134+
ctx.Reset();
135+
136+
gdv_fn_castFLOAT4_utf8(ctx_ptr, "e", 1);
137+
EXPECT_THAT(ctx.get_error(),
138+
::testing::HasSubstr("Failed to cast the string e to float"));
139+
ctx.Reset();
140+
}
141+
142+
TEST(TestGdvFnStubs, TestCastFloat8) {
143+
gandiva::ExecutionContext ctx;
144+
145+
int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
146+
147+
EXPECT_EQ(gdv_fn_castFLOAT8_utf8(ctx_ptr, "-45.34", 6), -45.34);
148+
EXPECT_EQ(gdv_fn_castFLOAT8_utf8(ctx_ptr, "0", 1), 0.0);
149+
EXPECT_EQ(gdv_fn_castFLOAT8_utf8(ctx_ptr, "5", 1), 5.0);
150+
EXPECT_EQ(gdv_fn_castFLOAT8_utf8(ctx_ptr, " 3.4 ", 5), 3.4);
151+
152+
gdv_fn_castFLOAT8_utf8(ctx_ptr, "", 0);
153+
EXPECT_THAT(ctx.get_error(),
154+
::testing::HasSubstr("Failed to cast the string to double"));
155+
ctx.Reset();
156+
157+
gdv_fn_castFLOAT8_utf8(ctx_ptr, "e", 1);
158+
EXPECT_THAT(ctx.get_error(),
159+
::testing::HasSubstr("Failed to cast the string e to double"));
160+
ctx.Reset();
161+
}
162+
163+
} // namespace gandiva

cpp/src/gandiva/precompiled/string_ops.cc

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ extern "C" {
2323
#include <stdio.h>
2424
#include <stdlib.h>
2525
#include <string.h>
26+
2627
#include "./types.h"
2728

2829
FORCE_INLINE
@@ -1439,27 +1440,4 @@ const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_le
14391440
return ret;
14401441
}
14411442

1442-
#define CAST_NUMERIC_FROM_STRING(OUT_TYPE, ARROW_TYPE, TYPE_NAME) \
1443-
FORCE_INLINE \
1444-
gdv_##OUT_TYPE cast##TYPE_NAME##_utf8(int64_t context, const char* data, \
1445-
int32_t len) { \
1446-
gdv_##OUT_TYPE val = 0; \
1447-
int32_t trimmed_len; \
1448-
data = btrim_utf8(context, data, len, &trimmed_len); \
1449-
if (!arrow::internal::ParseValue<ARROW_TYPE>(data, trimmed_len, &val)) { \
1450-
std::string err = "Failed to cast the string " + std::string(data, trimmed_len) + \
1451-
" to " #OUT_TYPE; \
1452-
gdv_fn_context_set_error_msg(context, err.c_str()); \
1453-
} \
1454-
return val; \
1455-
}
1456-
1457-
CAST_NUMERIC_FROM_STRING(int32, arrow::Int32Type, INT)
1458-
CAST_NUMERIC_FROM_STRING(int64, arrow::Int64Type, BIGINT)
1459-
CAST_NUMERIC_FROM_STRING(float32, arrow::FloatType, FLOAT4)
1460-
CAST_NUMERIC_FROM_STRING(float64, arrow::DoubleType, FLOAT8)
1461-
1462-
#undef CAST_INT_FROM_STRING
1463-
#undef CAST_FLOAT_FROM_STRING
1464-
14651443
} // extern "C"

0 commit comments

Comments
 (0)