@@ -46,7 +46,7 @@ TEST(TestShaHashUtils, TestSha1Numeric) {
4646 for (auto value : values_to_be_hashed) {
4747 int out_length;
4848 const char * sha_1 =
49- gandiva::gdv_hash_using_sha1 (ctx_ptr, &value, sizeof (value), &out_length);
49+ gandiva::gdv_sha1_hash (ctx_ptr, &value, sizeof (value), &out_length);
5050 std::string sha1_as_str (sha_1, out_length);
5151 EXPECT_EQ (sha1_as_str.size (), sha1_size);
5252
@@ -81,7 +81,7 @@ TEST(TestShaHashUtils, TestSha256Numeric) {
8181 for (auto value : values_to_be_hashed) {
8282 int out_length;
8383 const char * sha_256 =
84- gandiva::gdv_hash_using_sha256 (ctx_ptr, &value, sizeof (value), &out_length);
84+ gandiva::gdv_sha256_hash (ctx_ptr, &value, sizeof (value), &out_length);
8585 std::string sha256_as_str (sha_256, out_length);
8686 EXPECT_EQ (sha256_as_str.size (), sha256_size);
8787
@@ -91,6 +91,40 @@ TEST(TestShaHashUtils, TestSha256Numeric) {
9191 }
9292}
9393
94+ TEST (TestShaHashUtils, TestMD5Numeric) {
95+ gandiva::ExecutionContext ctx;
96+
97+ auto ctx_ptr = reinterpret_cast <int64_t >(&ctx);
98+
99+ std::vector<uint64_t > values_to_be_hashed;
100+
101+ // Generate a list of values to obtains the MD5 hash
102+ values_to_be_hashed.push_back (gandiva::gdv_double_to_long (0.0 ));
103+ values_to_be_hashed.push_back (gandiva::gdv_double_to_long (0.1 ));
104+ values_to_be_hashed.push_back (gandiva::gdv_double_to_long (0.2 ));
105+ values_to_be_hashed.push_back (gandiva::gdv_double_to_long (-0.10000001 ));
106+ values_to_be_hashed.push_back (gandiva::gdv_double_to_long (-0.0000001 ));
107+ values_to_be_hashed.push_back (gandiva::gdv_double_to_long (1.000000 ));
108+ values_to_be_hashed.push_back (gandiva::gdv_double_to_long (-0.0000002 ));
109+ values_to_be_hashed.push_back (gandiva::gdv_double_to_long (0.999999 ));
110+
111+ // Checks if the hash value is different for each one of the values
112+ std::unordered_set<std::string> md5_values;
113+
114+ int md5_size = 32 ;
115+
116+ for (auto value : values_to_be_hashed) {
117+ int out_length;
118+ const char * md5 = gandiva::gdv_md5_hash (ctx_ptr, &value, sizeof (value), &out_length);
119+ std::string md5_as_str (md5, out_length);
120+ EXPECT_EQ (md5_as_str.size (), md5_size);
121+
122+ // The value can not exists inside the set with the hash results
123+ EXPECT_EQ (md5_values.find (md5_as_str), md5_values.end ());
124+ md5_values.insert (md5_as_str);
125+ }
126+ }
127+
94128TEST (TestShaHashUtils, TestSha1Varlen) {
95129 gandiva::ExecutionContext ctx;
96130
@@ -113,14 +147,14 @@ TEST(TestShaHashUtils, TestSha1Varlen) {
113147 const int sha1_size = 40 ;
114148 int out_length;
115149
116- const char * sha_1 = gandiva::gdv_hash_using_sha1 (ctx_ptr, first_string.c_str (),
117- first_string.size (), &out_length);
150+ const char * sha_1 = gandiva::gdv_sha1_hash (ctx_ptr, first_string.c_str (),
151+ first_string.size (), &out_length);
118152 std::string sha1_as_str (sha_1, out_length);
119153 EXPECT_EQ (sha1_as_str.size (), sha1_size);
120154 EXPECT_EQ (sha1_as_str, expected_first_result);
121155
122- const char * sha_2 = gandiva::gdv_hash_using_sha1 (ctx_ptr, second_string.c_str (),
123- second_string.size (), &out_length);
156+ const char * sha_2 = gandiva::gdv_sha1_hash (ctx_ptr, second_string.c_str (),
157+ second_string.size (), &out_length);
124158 std::string sha2_as_str (sha_2, out_length);
125159 EXPECT_EQ (sha2_as_str.size (), sha1_size);
126160 EXPECT_EQ (sha2_as_str, expected_second_result);
@@ -150,15 +184,49 @@ TEST(TestShaHashUtils, TestSha256Varlen) {
150184 const int sha256_size = 64 ;
151185 int out_length;
152186
153- const char * sha_1 = gandiva::gdv_hash_using_sha256 (ctx_ptr, first_string.c_str (),
154- first_string.size (), &out_length);
187+ const char * sha_1 = gandiva::gdv_sha256_hash (ctx_ptr, first_string.c_str (),
188+ first_string.size (), &out_length);
155189 std::string sha1_as_str (sha_1, out_length);
156190 EXPECT_EQ (sha1_as_str.size (), sha256_size);
157191 EXPECT_EQ (sha1_as_str, expected_first_result);
158192
159- const char * sha_2 = gandiva::gdv_hash_using_sha256 (ctx_ptr, second_string.c_str (),
160- second_string.size (), &out_length);
193+ const char * sha_2 = gandiva::gdv_sha256_hash (ctx_ptr, second_string.c_str (),
194+ second_string.size (), &out_length);
161195 std::string sha2_as_str (sha_2, out_length);
162196 EXPECT_EQ (sha2_as_str.size (), sha256_size);
163197 EXPECT_EQ (sha2_as_str, expected_second_result);
164198}
199+
200+ TEST (TestShaHashUtils, TestMD5Varlen) {
201+ gandiva::ExecutionContext ctx;
202+
203+ auto ctx_ptr = reinterpret_cast <int64_t >(&ctx);
204+
205+ std::string first_string =
206+ " ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃnY [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]" ;
207+
208+ std::string second_string =
209+ " ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeınY [ˈʏpsilɔn], "
210+ " Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ" ;
211+
212+ // The strings expected hashes are obtained from shell executing the following command:
213+ // echo -n <output-string> | openssl dgst md5
214+ std::string expected_first_result = " a633460644425b44e0e023d6980849cc" ;
215+ std::string expected_second_result = " 407983529dba21e95d95951ccffd30c3" ;
216+
217+ // Generate the hashes and compare with expected outputs
218+ const int md5_size = 32 ;
219+ int out_length;
220+
221+ const char * md5_1 = gandiva::gdv_md5_hash (ctx_ptr, first_string.c_str (),
222+ first_string.size (), &out_length);
223+ std::string md5_as_str (md5_1, out_length);
224+ EXPECT_EQ (md5_as_str.size (), md5_size);
225+ EXPECT_EQ (md5_as_str, expected_first_result);
226+
227+ const char * md5_2 = gandiva::gdv_md5_hash (ctx_ptr, second_string.c_str (),
228+ second_string.size (), &out_length);
229+ std::string md5_2_as_str (md5_2, out_length);
230+ EXPECT_EQ (md5_2_as_str.size (), md5_size);
231+ EXPECT_EQ (md5_2_as_str, expected_second_result);
232+ }
0 commit comments