Skip to content

Commit b0da01d

Browse files
jpedroantunespraveenbingo
authored andcommitted
ARROW-12936: [C++][Gandiva] Implement ASCII Hive function on Gandiva
Implement ASCII Hive function on Gandiva Closes apache#10438 from jpedroantunes/feature/add-ascii and squashes the following commits: 49df145 <João Pedro> Correct linter errors 5bda1e4 <João Pedro> Add projector test for ascii 15e1936 <João Pedro> Add base implementation for ascii Authored-by: João Pedro <joaop@simbioseventures.com> Signed-off-by: Praveen <praveen@dremio.com>
1 parent 0477cfc commit b0da01d

5 files changed

Lines changed: 60 additions & 0 deletions

File tree

cpp/src/gandiva/function_registry_string.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
6262
UNARY_SAFE_NULL_NEVER_BOOL_FN(isnull, {}),
6363
UNARY_SAFE_NULL_NEVER_BOOL_FN(isnotnull, {}),
6464

65+
NativeFunction("ascii", {}, DataTypeVector{utf8()}, int32(), kResultNullIfNull,
66+
"ascii_utf8"),
67+
6568
NativeFunction("upper", {}, DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
6669
"gdv_fn_upper_utf8", NativeFunction::kNeedsContext),
6770

cpp/src/gandiva/precompiled/string_ops.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,6 +1214,15 @@ const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
12141214
return ret;
12151215
}
12161216

1217+
// Returns the numeric value of the first character of str.
1218+
GANDIVA_EXPORT
1219+
gdv_int32 ascii_utf8(const char* data, gdv_int32 data_len) {
1220+
if (data_len == 0) {
1221+
return 0;
1222+
}
1223+
return static_cast<gdv_int32>(data[0]);
1224+
}
1225+
12171226
FORCE_INLINE
12181227
const char* convert_fromUTF8_binary(gdv_int64 context, const char* bin_in, gdv_int32 len,
12191228
gdv_int32* out_len) {

cpp/src/gandiva/precompiled/string_ops_test.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,17 @@ TEST(TestStringOps, TestCompare) {
4242
EXPECT_GT(mem_compare(left, 7, right, 5), 0);
4343
}
4444

45+
TEST(TestStringOps, TestAscii) {
46+
// ASCII
47+
EXPECT_EQ(ascii_utf8("ABC", 3), 65);
48+
EXPECT_EQ(ascii_utf8("abc", 3), 97);
49+
EXPECT_EQ(ascii_utf8("Hello World!", 12), 72);
50+
EXPECT_EQ(ascii_utf8("This is us", 10), 84);
51+
EXPECT_EQ(ascii_utf8("", 0), 0);
52+
EXPECT_EQ(ascii_utf8("123", 3), 49);
53+
EXPECT_EQ(ascii_utf8("999", 3), 57);
54+
}
55+
4556
TEST(TestStringOps, TestBeginsEnds) {
4657
// starts_with
4758
EXPECT_TRUE(starts_with_utf8_utf8("hello sir", 9, "hello", 5));

cpp/src/gandiva/precompiled/types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,8 @@ const char* btrim_utf8_utf8(gdv_int64 context, const char* basetext,
412412
gdv_int32 basetext_len, const char* trimtext,
413413
gdv_int32 trimtext_len, int32_t* out_len);
414414

415+
gdv_int32 ascii_utf8(const char* data, gdv_int32 data_len);
416+
415417
gdv_int32 locate_utf8_utf8(gdv_int64 context, const char* sub_str, gdv_int32 sub_str_len,
416418
const char* str, gdv_int32 str_len);
417419

cpp/src/gandiva/tests/utf8_test.cc

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,4 +680,39 @@ TEST_F(TestUtf8, TestCastVarChar) {
680680
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[0]);
681681
}
682682

683+
TEST_F(TestUtf8, TestAscii) {
684+
// schema for input fields
685+
auto field0 = field("f0", arrow::utf8());
686+
auto schema = arrow::schema({field0});
687+
688+
// output fields
689+
auto field_asc = field("ascii", arrow::int32());
690+
691+
// Build expression
692+
auto asc_expr = TreeExprBuilder::MakeExpression("ascii", {field0}, field_asc);
693+
694+
std::shared_ptr<Projector> projector;
695+
auto status = Projector::Make(schema, {asc_expr}, TestConfiguration(), &projector);
696+
EXPECT_TRUE(status.ok()) << status.message();
697+
698+
// Create a row-batch with some sample data
699+
int num_records = 6;
700+
auto array0 = MakeArrowArrayUtf8({"ABC", "", "abc", "Hello World", "123", "999"},
701+
{true, true, true, true, true, true});
702+
// expected output
703+
auto exp_asc =
704+
MakeArrowArrayInt32({65, 0, 97, 72, 49, 57}, {true, true, true, true, true, true});
705+
706+
// prepare input record batch
707+
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
708+
709+
// Evaluate expression
710+
arrow::ArrayVector outputs;
711+
status = projector->Evaluate(*in_batch, pool_, &outputs);
712+
EXPECT_TRUE(status.ok()) << status.message();
713+
714+
// Validate results
715+
EXPECT_ARROW_ARRAY_EQUALS(exp_asc, outputs.at(0));
716+
}
717+
683718
} // namespace gandiva

0 commit comments

Comments
 (0)