Skip to content

Commit efeecfb

Browse files
pprudhviPindikura Ravindra
authored andcommitted
ARROW-6137: [C++][Gandiva] Change output format of castVARCHAR(timestamp) in Gandiva
Format timestamp to yyyy-MM-dd hh:mm:ss.sss Closes apache#5014 from pprudhvi/timestamp-to-string and squashes the following commits: 3cfad34 <Prudhvi Porandla> do not hardcode ts length f1867e8 <Prudhvi Porandla> add java unittest 93c6bf4 <Prudhvi Porandla> use iomanip to format timestamp 35e7431 <Prudhvi Porandla> uset inttypes.h c3ea538 <Prudhvi Porandla> cast to long long int 1e389e2 <Prudhvi Porandla> use snprintf, return const char* 73c39f4 <Prudhvi Porandla> error message if out_len is negative 8389473 <Prudhvi Porandla> return empty string if out_len is zero 6375622 <Prudhvi Porandla> refactor 124b6dc <Prudhvi Porandla> correct castVarchar(timestamp) method Authored-by: Prudhvi Porandla <prudhvi.porandla@icloud.com> Signed-off-by: Pindikura Ravindra <ravindra@dremio.com>
1 parent 0d1c7ec commit efeecfb

4 files changed

Lines changed: 131 additions & 6 deletions

File tree

cpp/src/gandiva/precompiled/time.cc

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
#include <iomanip>
19+
#include <sstream>
1820
#include "./epoch_time_point.h"
1921

2022
extern "C" {
@@ -689,19 +691,42 @@ timestamp castTIMESTAMP_utf8(int64_t context, const char* input, int32 length) {
689691

690692
timestamp castTIMESTAMP_date64(date64 date_in_millis) { return date_in_millis; }
691693

692-
char* castVARCHAR_timestamp_int64(int64 context, timestamp in, int64 length,
693-
int32* out_len) {
694-
std::string timestamp_str = std::to_string(in);
695-
*out_len = static_cast<int32>(length);
694+
const char* castVARCHAR_timestamp_int64(int64 context, timestamp in, int64 length,
695+
int32* out_len) {
696+
int64 year = extractYear_timestamp(in);
697+
int64 month = extractMonth_timestamp(in);
698+
int64 day = extractDay_timestamp(in);
699+
int64 hour = extractHour_timestamp(in);
700+
int64 minute = extractMinute_timestamp(in);
701+
int64 second = extractSecond_timestamp(in);
702+
int64 millis = in % MILLIS_IN_SEC;
703+
704+
// format to yyyy-MM-dd hh:mm:ss.sss
705+
std::stringstream s;
706+
s << std::setfill('0') << std::setw(4) << year << "-" << std::setw(2) << month << "-"
707+
<< std::setw(2) << day << " " << std::setw(2) << hour << ":" << std::setw(2) << minute
708+
<< ":" << std::setw(2) << second << "." << std::setw(3) << millis;
709+
std::string timestamp_str = s.str();
696710
int32 timestamp_str_len = static_cast<int32>(timestamp_str.length());
697-
if (length > timestamp_str_len) {
711+
712+
*out_len = static_cast<int32>(length);
713+
if (*out_len > timestamp_str_len) {
698714
*out_len = timestamp_str_len;
699715
}
716+
717+
if (*out_len <= 0) {
718+
if (*out_len < 0) {
719+
gdv_fn_context_set_error_msg(context, "Length of output string cannot be negative");
720+
}
721+
*out_len = 0;
722+
return "";
723+
}
724+
700725
char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
701726
if (ret == nullptr) {
702727
gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
703728
*out_len = 0;
704-
return nullptr;
729+
return "";
705730
}
706731
memcpy(ret, timestamp_str.data(), *out_len);
707732
return ret;

cpp/src/gandiva/precompiled/time_test.cc

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,4 +636,23 @@ TEST(TestTime, TestMonthsBetween) {
636636
}
637637
}
638638

639+
TEST(TestTime, castVarcharTimestamp) {
640+
ExecutionContext context;
641+
int64_t context_ptr = reinterpret_cast<int64_t>(&context);
642+
int32 out_len;
643+
timestamp ts = StringToTimestamp("2000-05-01 10:20:34");
644+
const char* out = castVARCHAR_timestamp_int64(context_ptr, ts, 30L, &out_len);
645+
EXPECT_EQ(std::string(out, out_len), "2000-05-01 10:20:34.000");
646+
647+
out = castVARCHAR_timestamp_int64(context_ptr, ts, 19L, &out_len);
648+
EXPECT_EQ(std::string(out, out_len), "2000-05-01 10:20:34");
649+
650+
out = castVARCHAR_timestamp_int64(context_ptr, ts, 0L, &out_len);
651+
EXPECT_EQ(std::string(out, out_len), "");
652+
653+
ts = StringToTimestamp("2-05-01 0:0:4");
654+
out = castVARCHAR_timestamp_int64(context_ptr, ts, 24L, &out_len);
655+
EXPECT_EQ(std::string(out, out_len), "0002-05-01 00:00:04.000");
656+
}
657+
639658
} // namespace gandiva

cpp/src/gandiva/precompiled/types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ date64 castDATE_utf8(int64_t execution_context, const char* input, int32 length)
165165

166166
timestamp castTIMESTAMP_utf8(int64_t execution_context, const char* input, int32 length);
167167
timestamp castTIMESTAMP_date64(date64);
168+
const char* castVARCHAR_timestamp_int64(int64_t, timestamp, int64, int32*);
168169

169170
int64 truncate_int64_int32(int64 in, int32 out_scale);
170171

java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1388,4 +1388,84 @@ public void testUnknownFunction() {
13881388

13891389
assertTrue(caughtException);
13901390
}
1391+
1392+
@Test
1393+
public void testCastTimestampToString() throws Exception {
1394+
ArrowType timeStamp = new ArrowType.Timestamp(TimeUnit.MILLISECOND, "TZ");
1395+
1396+
Field tsField = Field.nullable("timestamp", timeStamp);
1397+
Field lenField = Field.nullable("outLength", int64);
1398+
1399+
TreeNode tsNode = TreeBuilder.makeField(tsField);
1400+
TreeNode lenNode = TreeBuilder.makeField(lenField);
1401+
1402+
TreeNode tsToString = TreeBuilder.makeFunction("castVARCHAR", Lists.newArrayList(tsNode, lenNode),
1403+
new ArrowType.Utf8());
1404+
1405+
Field resultField = Field.nullable("result", new ArrowType.Utf8());
1406+
List<ExpressionTree> exprs =
1407+
Lists.newArrayList(
1408+
TreeBuilder.makeExpression(tsToString, resultField));
1409+
1410+
Schema schema = new Schema(Lists.newArrayList(tsField, lenField));
1411+
Projector eval = Projector.make(schema, exprs);
1412+
1413+
int numRows = 5;
1414+
byte[] validity = new byte[] {(byte) 255};
1415+
String[] values =
1416+
new String[] {
1417+
"0007-01-01T01:00:00Z",
1418+
"2007-03-05T03:40:00Z",
1419+
"2008-05-31T13:55:00Z",
1420+
"2000-06-30T23:20:00Z",
1421+
"2000-07-10T20:30:00Z",
1422+
};
1423+
long[] lenValues =
1424+
new long[] {
1425+
23L, 24L, 22L, 0L, 4L
1426+
};
1427+
1428+
String[] expValues =
1429+
new String[] {
1430+
"0007-01-01 01:00:00.000",
1431+
"2007-03-05 03:40:00.000",
1432+
"2008-05-31 13:55:00.00",
1433+
"",
1434+
"2000",
1435+
};
1436+
1437+
ArrowBuf bufValidity = buf(validity);
1438+
ArrowBuf millisData = stringToMillis(values);
1439+
ArrowBuf lenValidity = buf(validity);
1440+
ArrowBuf lenData = longBuf(lenValues);
1441+
1442+
ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0);
1443+
ArrowRecordBatch batch =
1444+
new ArrowRecordBatch(
1445+
numRows,
1446+
Lists.newArrayList(fieldNode, fieldNode),
1447+
Lists.newArrayList(bufValidity, millisData, lenValidity, lenData));
1448+
1449+
List<ValueVector> output = new ArrayList<>();
1450+
for (int i = 0; i < exprs.size(); i++) {
1451+
VarCharVector charVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
1452+
1453+
charVector.allocateNew(numRows * 23, numRows);
1454+
output.add(charVector);
1455+
}
1456+
eval.evaluate(batch, output);
1457+
eval.close();
1458+
1459+
for (ValueVector valueVector : output) {
1460+
VarCharVector charVector = (VarCharVector) valueVector;
1461+
1462+
for (int j = 0; j < numRows; j++) {
1463+
assertFalse(charVector.isNull(j));
1464+
assertEquals(expValues[j], new String(charVector.get(j)));
1465+
}
1466+
}
1467+
1468+
releaseRecordBatch(batch);
1469+
releaseValueVectors(output);
1470+
}
13911471
}

0 commit comments

Comments
 (0)