Skip to content

Commit c950c8a

Browse files
committed
Add base tests for convert replace invalid chars
1 parent 2a5fe94 commit c950c8a

1 file changed

Lines changed: 34 additions & 0 deletions

File tree

cpp/src/gandiva/precompiled/string_ops_test.cc

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,40 @@ TEST(TestStringOps, TestCharLength) {
115115
ctx.Reset();
116116
}
117117

118+
TEST(TestStringOps, TestConvertReplaceInvalidUtf8Char) {
119+
gandiva::ExecutionContext ctx;
120+
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
121+
122+
// invalid utf8 (xf8 is invalid but x28 is not - x28 = '(')
123+
std::string a("ok-\xf8\x28""-a");
124+
gdv_int32 a_in_out_len = a.length();
125+
const char* a_str = convert_replace_invalid_fromUTF8_binary(ctx_ptr, a.data(),
126+
a_in_out_len, "a",
127+
&a_in_out_len);
128+
EXPECT_EQ(std::string(a_str, a.length()), "ok-a(-a");
129+
EXPECT_FALSE(ctx.has_error());
130+
131+
// invalid utf8 (xa0 and xa1 are invalid)
132+
std::string b("ok-\xa0\xa1-valid");
133+
gdv_int32 b_in_out_len = b.length();
134+
const char* b_str = convert_replace_invalid_fromUTF8_binary(ctx_ptr, b.data(),
135+
b_in_out_len, "b",
136+
&b_in_out_len);
137+
EXPECT_EQ(std::string(b_str, b.length()), "ok-bb-valid");
138+
EXPECT_FALSE(ctx.has_error());
139+
140+
// full valid utf8
141+
std::string c("all-valid");
142+
gdv_int32 c_in_out_len = c.length();
143+
const char* c_str = convert_replace_invalid_fromUTF8_binary(ctx_ptr, c.data(),
144+
c_in_out_len, "c",
145+
&c_in_out_len);
146+
EXPECT_EQ(std::string(c_str, c.length()), "all-valid");
147+
EXPECT_FALSE(ctx.has_error());
148+
149+
ctx.Reset();
150+
}
151+
118152
TEST(TestStringOps, TestCastBoolToVarchar) {
119153
gandiva::ExecutionContext ctx;
120154
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);

0 commit comments

Comments
 (0)