Add initial support for castFLOAT4 and castFLOAT8 for varbinary

jvictorhuguenin · jvictorhuguenin · commit 7fb41bd7f191 · 2021-07-16T10:05:57.000-03:00
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -25,6 +25,7 @@
 #include "arrow/util/base64.h"
 #include "arrow/util/formatting.h"
 #include "arrow/util/utf8.h"
+#include "arrow/util/double_conversion.h"
 #include "arrow/util/value_parsing.h"
 #include "gandiva/engine.h"
 #include "gandiva/exported_funcs.h"
@@ -765,6 +766,29 @@ const char* gdv_fn_initcap_utf8(int64_t context, const char* data, int32_t data_
   *out_len = out_idx;
   return out;
 }
+
+#define CAST_FLOAT_VARBINARY(OUT_TYPE, TYPE_NAME)                                    \
+  GANDIVA_EXPORT                                                                     \
+  OUT_TYPE gdv_fn_cast##TYPE_NAME##_varbinary(gdv_int64 context, const char* in,     \
+                                              int32_t in_len) {                      \
+    if (in_len < 0) {                                                                \
+      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative");    \
+      return -1;                                                                     \
+    }                                                                                \
+    if (in_len == 0) {                                                               \
+      gdv_fn_context_set_error_msg(context, "Buffer can't be empty");                \
+      return -1;                                                                     \
+    }                                                                                \
+    int flags = double_conversion::StringToDoubleConverter::ALLOW_HEX;               \
+    arrow::util::double_conversion::StringToDoubleConverter converter(flags, -1, -1, \
+                                                                      "inf", "NaN"); \
+    return converter.StringToDouble(in, in_len, 0);                                  \
+  }
+
+CAST_FLOAT_VARBINARY(float, FLOAT4)
+CAST_FLOAT_VARBINARY(double, FLOAT8)
+
+#undef CAST_FLOAT_VARBINARY
 }
 
 namespace gandiva {
@@ -1020,6 +1044,22 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
   engine->AddGlobalMappingForFunc("gdv_fn_castFLOAT8_utf8", types->double_type(), args,
                                   reinterpret_cast<void*>(gdv_fn_castFLOAT8_utf8));
 
+  args = {types->i64_type(),     // int64_t context_ptr
+          types->i8_ptr_type(),  // const char* data
+          types->i32_type()};    // int32_t lenr
+
+  engine->AddGlobalMappingForFunc("gdv_fn_castFLOAT4_varbinary", types->float_type(),
+                                  args,
+                                  reinterpret_cast<void*>(gdv_fn_castFLOAT4_varbinary));
+
+  args = {types->i64_type(),     // int64_t context_ptr
+          types->i8_ptr_type(),  // const char* data
+          types->i32_type()};    // int32_t lenr
+
+  engine->AddGlobalMappingForFunc("gdv_fn_castFLOAT8_varbinary", types->double_type(),
+                                  args,
+                                  reinterpret_cast<void*>(gdv_fn_castFLOAT8_varbinary));
+
   // gdv_fn_castVARCHAR_int32_int64
   args = {types->i64_type(),       // int64_t execution_context
           types->i32_type(),       // int32_t value
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
@@ -159,14 +159,8 @@ const char* gdv_fn_initcap_utf8(int64_t context, const char* data, int32_t data_
                                 int32_t* out_len);
 
 GANDIVA_EXPORT
-int32_t gdv_fn_castINT_varbinary(int64_t context, const char* data, int32_t data_len);
+float gdv_fn_castFLOAT4_varbinary(gdv_int64 context, const char* in, int32_t in_len);
 
 GANDIVA_EXPORT
-int64_t gdv_fn_castBIGINT_varbinary(int64_t context, const char* data, int32_t data_len);
-
-GANDIVA_EXPORT
-float gdv_fn_castFLOAT4_varbinary(int64_t context, const char* data, int32_t data_len);
-
-GANDIVA_EXPORT
-double gdv_fn_castFLOAT8_varbinary(int64_t context, const char* data, int32_t data_len);
+double gdv_fn_castFLOAT8_varbinary(gdv_int64 context, const char* in, int32_t in_len);
 }
diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc
@@ -759,4 +759,53 @@ TEST(TestGdvFnStubs, TestCastVarbinaryFloat8) {
   ctx.Reset();
 }
 
+TEST(TestGdvFnStubs, TestCastFLOAT4Varbinary) {
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, "-FFF.3", 6), -65523);
+  EXPECT_FALSE(ctx.has_error());
+  ctx.Reset();
+
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, "FFF3", 4), 65523);
+  EXPECT_FALSE(ctx.has_error());
+  ctx.Reset();
+
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, "-7FFFFFFFFFFFFFFF", 17), INT64_MIN + 1);
+  EXPECT_FALSE(ctx.has_error());
+  ctx.Reset();
+
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, "7FFFFFFFFFFFFFFF", 16), INT64_MAX);
+  EXPECT_FALSE(ctx.has_error());
+  ctx.Reset();
+
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, "0", 1), 0);
+  EXPECT_FALSE(ctx.has_error());
+  ctx.Reset();
+
+  EXPECT_EQ(gdv_fn_castFLOAT4_varbinary(ctx_ptr, "-0", 2), 0);
+  EXPECT_FALSE(ctx.has_error());
+  ctx.Reset();
+
+  gdv_fn_castFLOAT4_varbinary(ctx_ptr, "", 0);
+  EXPECT_STREQ(ctx.get_error().c_str(), "Can't cast an empty string.");
+  ctx.Reset();
+
+  gdv_fn_castFLOAT4_varbinary(ctx_ptr, "-", 1);
+  EXPECT_STREQ(ctx.get_error().c_str(), "Can't cast hexadecimal with only a minus sign.");
+  ctx.Reset();
+
+  gdv_fn_castFLOAT4_varbinary(ctx_ptr, "8FFFFFFFFFFFFFFF", 16);
+  EXPECT_STREQ(ctx.get_error().c_str(), "Integer overflow.");
+  ctx.Reset();
+
+  gdv_fn_castFLOAT4_varbinary(ctx_ptr, "-8FFFFFFFFFFFFFFF", 17);
+  EXPECT_STREQ(ctx.get_error().c_str(), "Integer overflow.");
+  ctx.Reset();
+
+  gdv_fn_castFLOAT4_varbinary(ctx_ptr, "-8FFFFFGF", 8);
+  EXPECT_STREQ(ctx.get_error().c_str(), "The hexadecimal given has invalid characters.");
+  ctx.Reset();
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -1695,7 +1695,7 @@ TEST(TestStringOps, TestCastINTVarbinary) {
   EXPECT_FALSE(ctx.has_error());
   ctx.Reset();
 
-  EXPECT_EQ(castINT_varbinary(ctx_ptr, "-7FFFFFFF", 9), INT32_MIN+1);
+  EXPECT_EQ(castINT_varbinary(ctx_ptr, "-7FFFFFFF", 9), INT32_MIN + 1);
   EXPECT_FALSE(ctx.has_error());
   ctx.Reset();
 
@@ -1744,7 +1744,7 @@ TEST(TestStringOps, TestCastBIGINTVarbinary) {
   EXPECT_FALSE(ctx.has_error());
   ctx.Reset();
 
-  EXPECT_EQ(castBIGINT_varbinary(ctx_ptr, "-7FFFFFFFFFFFFFFF", 17), INT64_MIN+1);
+  EXPECT_EQ(castBIGINT_varbinary(ctx_ptr, "-7FFFFFFFFFFFFFFF", 17), INT64_MIN + 1);
   EXPECT_FALSE(ctx.has_error());
   ctx.Reset();
 
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
@@ -1146,22 +1146,24 @@ TEST_F(TestProjector, TestCastVarbinaryFunction) {
 
   std::shared_ptr<Projector> projector;
 
-  //  {cast_expr_float4, cast_expr_float8, cast_expr_int4, cast_expr_int8}
-  auto status = Projector::Make(
-      schema, {cast_expr_float4, cast_expr_float8, cast_expr_int4, cast_expr_int8},
-      TestConfiguration(), &projector);
+  //  {cast_expr_int4, cast_expr_int8}
+  auto status = Projector::Make(schema, {cast_expr_int4, cast_expr_int8},
+                                TestConfiguration(), &projector);
   EXPECT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
   int num_records = 4;
 
   // Last validity is false and the cast functions throw error when input is empty. Should
   // not be evaluated due to addition of NativeFunction::kCanReturnErrors
-  auto array0 = MakeArrowArrayBinary({"25", "-7FFFFFFF", "7FFFFFFF", "4"}, {true, true, true, false});
+  auto array0 = MakeArrowArrayBinary({"25", "-7FFFFFFF", "7FFFFFFF", "4"},
+                                     {true, true, true, false});
   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
 
-  auto out_int4 = MakeArrowArrayInt32({37, INT32_MIN+1, INT32_MAX, 0}, {true, true, true, false});
-  auto out_int8 = MakeArrowArrayInt64({37, INT32_MIN+1, INT32_MAX, 0}, {true, true, true, false});
+  auto out_int4 =
+      MakeArrowArrayInt32({37, INT32_MIN + 1, INT32_MAX, 0}, {true, true, true, false});
+  auto out_int8 =
+      MakeArrowArrayInt64({37, INT32_MIN + 1, INT32_MAX, 0}, {true, true, true, false});
 
   arrow::ArrayVector outputs;