@@ -115,6 +115,40 @@ TEST(TestStringOps, TestCharLength) {
115115 ctx.Reset ();
116116}
117117
118+ TEST (TestStringOps, TestConvertReplaceInvalidUtf8Char) {
119+ gandiva::ExecutionContext ctx;
120+ uint64_t ctx_ptr = reinterpret_cast <gdv_int64>(&ctx);
121+
122+ // invalid utf8 (xf8 is invalid but x28 is not - x28 = '(')
123+ std::string a (" ok-\xf8\x28 " " -a" );
124+ gdv_int32 a_in_out_len = a.length ();
125+ const char * a_str = convert_replace_invalid_fromUTF8_binary (ctx_ptr, a.data (),
126+ a_in_out_len, " a" ,
127+ &a_in_out_len);
128+ EXPECT_EQ (std::string (a_str, a.length ()), " ok-a(-a" );
129+ EXPECT_FALSE (ctx.has_error ());
130+
131+ // invalid utf8 (xa0 and xa1 are invalid)
132+ std::string b (" ok-\xa0\xa1 -valid" );
133+ gdv_int32 b_in_out_len = b.length ();
134+ const char * b_str = convert_replace_invalid_fromUTF8_binary (ctx_ptr, b.data (),
135+ b_in_out_len, " b" ,
136+ &b_in_out_len);
137+ EXPECT_EQ (std::string (b_str, b.length ()), " ok-bb-valid" );
138+ EXPECT_FALSE (ctx.has_error ());
139+
140+ // full valid utf8
141+ std::string c (" all-valid" );
142+ gdv_int32 c_in_out_len = c.length ();
143+ const char * c_str = convert_replace_invalid_fromUTF8_binary (ctx_ptr, c.data (),
144+ c_in_out_len, " c" ,
145+ &c_in_out_len);
146+ EXPECT_EQ (std::string (c_str, c.length ()), " all-valid" );
147+ EXPECT_FALSE (ctx.has_error ());
148+
149+ ctx.Reset ();
150+ }
151+
118152TEST (TestStringOps, TestCastBoolToVarchar) {
119153 gandiva::ExecutionContext ctx;
120154 uint64_t ctx_ptr = reinterpret_cast <gdv_int64>(&ctx);
0 commit comments