Skip to content

Commit ec44a12

Browse files
ARROW-16738: [C++][Gandiva] Fix TO_TIMESTAMP(INTEGER) function for big integer values (apache#13298)
When to_timestamp function gets a big value, the function returns an incorrect date due integer overflow: - TO_TIMESTAMP(1626255099[INT32]) -> '1969-12-14 04:54:53.816' The correct response would be: - TO_TIMESTAMP(1626255099[INT32]) -> '2021-07-14 09:31:39' The error is because we cast the integers to timestamp(int64) after multiplying the result for the total of millis. It is necessary to change the order and convert it to int64 before multiplying by the number of seconds Authored-by: Anthony Louis <anthony@simbioseventures.com> Signed-off-by: Pindikura Ravindra <ravindra@dremio.com>
1 parent 2885307 commit ec44a12

3 files changed

Lines changed: 104 additions & 13 deletions

File tree

cpp/src/gandiva/precompiled/time.cc

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,19 +40,25 @@ extern "C" {
4040
INNER(date64) \
4141
INNER(timestamp)
4242

43-
// Expand inner macro for all base numeric types.
44-
#define NUMERIC_TYPES(INNER) \
45-
INNER(int8) \
46-
INNER(int16) \
47-
INNER(int32) \
48-
INNER(int64) \
49-
INNER(uint8) \
50-
INNER(uint16) \
51-
INNER(uint32) \
52-
INNER(uint64) \
53-
INNER(float32) \
43+
#define INTEGER_NUMERIC_TYPES(INNER) \
44+
INNER(int8) \
45+
INNER(int16) \
46+
INNER(int32) \
47+
INNER(int64) \
48+
INNER(uint8) \
49+
INNER(uint16) \
50+
INNER(uint32) \
51+
INNER(uint64)
52+
53+
#define REAL_NUMERIC_TYPES(INNER) \
54+
INNER(float32) \
5455
INNER(float64)
5556

57+
// Expand inner macro for all base numeric types.
58+
#define NUMERIC_TYPES(INNER) \
59+
INTEGER_NUMERIC_TYPES(INNER) \
60+
REAL_NUMERIC_TYPES(INNER)
61+
5662
// Extract millennium
5763
#define EXTRACT_MILLENNIUM(TYPE) \
5864
FORCE_INLINE \
@@ -983,13 +989,23 @@ gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) {
983989
}
984990

985991
// Convert the seconds since epoch argument to timestamp
986-
#define TO_TIMESTAMP(TYPE) \
992+
#define TO_TIMESTAMP_INTEGER(TYPE) \
993+
FORCE_INLINE \
994+
gdv_timestamp to_timestamp##_##TYPE(gdv_##TYPE seconds) { \
995+
return static_cast<gdv_timestamp>(seconds) * MILLIS_IN_SEC; \
996+
}
997+
998+
#define TO_TIMESTAMP_REAL(TYPE) \
987999
FORCE_INLINE \
9881000
gdv_timestamp to_timestamp##_##TYPE(gdv_##TYPE seconds) { \
9891001
return static_cast<gdv_timestamp>(seconds * MILLIS_IN_SEC); \
9901002
}
9911003

992-
NUMERIC_TYPES(TO_TIMESTAMP)
1004+
INTEGER_NUMERIC_TYPES(TO_TIMESTAMP_INTEGER)
1005+
REAL_NUMERIC_TYPES(TO_TIMESTAMP_REAL)
1006+
1007+
#undef TO_TIMESTAMP_INTEGER
1008+
#undef TO_TIMESTAMP_REAL
9931009

9941010
// Convert the seconds since epoch argument to time
9951011
#define TO_TIME(TYPE) \

cpp/src/gandiva/precompiled/time_test.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,10 @@ TEST(TestTime, TestToTimestamp) {
962962
EXPECT_EQ(ts, to_timestamp_float32(1));
963963
EXPECT_EQ(ts, to_timestamp_float64(1));
964964

965+
ts = StringToTimestamp("2021-07-14 09:31:39");
966+
EXPECT_EQ(ts, to_timestamp_int32(1626255099));
967+
EXPECT_EQ(ts, to_timestamp_int64(1626255099));
968+
965969
ts = StringToTimestamp("1970-01-01 00:01:00");
966970
EXPECT_EQ(ts, to_timestamp_int32(60));
967971
EXPECT_EQ(ts, to_timestamp_int64(60));

cpp/src/gandiva/tests/date_time_test.cc

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,4 +649,75 @@ TEST_F(TestProjector, TestLastDay) {
649649
// Validate results
650650
EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0));
651651
}
652+
653+
TEST_F(TestProjector, TestToTimestampFromInt) {
654+
auto f0 = field("f0", arrow::int32());
655+
auto f1 = field("f1", arrow::int64());
656+
auto f2 = field("f2", arrow::float32());
657+
auto f3 = field("f3", arrow::float64());
658+
auto schema = arrow::schema({f0, f1, f2, f3});
659+
660+
// output fields
661+
auto output = field("out", arrow::timestamp(arrow::TimeUnit::MILLI));
662+
auto output1 = field("out1", arrow::timestamp(arrow::TimeUnit::MILLI));
663+
auto output2 = field("out1", arrow::timestamp(arrow::TimeUnit::MILLI));
664+
auto output3 = field("out1", arrow::timestamp(arrow::TimeUnit::MILLI));
665+
666+
auto totimestamp_expr = TreeExprBuilder::MakeExpression("to_timestamp", {f0}, output);
667+
auto totimestamp_expr1 = TreeExprBuilder::MakeExpression("to_timestamp", {f1}, output1);
668+
auto totimestamp_expr2 = TreeExprBuilder::MakeExpression("to_timestamp", {f2}, output2);
669+
auto totimestamp_expr3 = TreeExprBuilder::MakeExpression("to_timestamp", {f3}, output3);
670+
671+
std::shared_ptr<Projector> projector;
672+
auto status = Projector::Make(
673+
schema, {totimestamp_expr, totimestamp_expr1, totimestamp_expr2, totimestamp_expr3},
674+
TestConfiguration(), &projector);
675+
std::cout << status.message();
676+
ASSERT_TRUE(status.ok());
677+
678+
time_t epoch = Epoch();
679+
680+
int num_records = 3;
681+
auto validity = {true, true, false};
682+
std::vector<int32_t> f0_data = {0, 1626255099, 0};
683+
std::vector<int64_t> f1_data = {0, 1626255099, 0};
684+
std::vector<float> f2_data = {0, 3601.411f, 0};
685+
std::vector<double> f3_data = {0, 3601.411, 0};
686+
687+
auto array0 = MakeArrowArrayInt32(f0_data, validity);
688+
auto array1 = MakeArrowArrayInt64(f1_data, validity);
689+
auto array2 = MakeArrowArrayFloat32(f2_data, validity);
690+
auto array3 = MakeArrowArrayFloat64(f3_data, validity);
691+
692+
std::vector<int64_t> f0_1_output_data = {MillisSince(epoch, 1970, 1, 1, 0, 0, 0, 0),
693+
MillisSince(epoch, 2021, 7, 14, 9, 31, 39, 0),
694+
0};
695+
696+
std::vector<int64_t> f2_3_output_data = {MillisSince(epoch, 1970, 1, 1, 0, 0, 0, 0),
697+
MillisSince(epoch, 1970, 1, 1, 1, 0, 1, 411),
698+
0};
699+
700+
// expected output
701+
auto exp_output = MakeArrowTypeArray<arrow::TimestampType, int64_t>(
702+
timestamp(arrow::TimeUnit::MILLI), f0_1_output_data, validity);
703+
704+
// expected output
705+
auto exp_output1 = MakeArrowTypeArray<arrow::TimestampType, int64_t>(
706+
timestamp(arrow::TimeUnit::MILLI), f2_3_output_data, validity);
707+
708+
// prepare input record batch
709+
auto in_batch =
710+
arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2, array3});
711+
712+
// Evaluate expression
713+
arrow::ArrayVector outputs;
714+
status = projector->Evaluate(*in_batch, pool_, &outputs);
715+
EXPECT_TRUE(status.ok());
716+
717+
// Validate results
718+
EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0));
719+
EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(1));
720+
EXPECT_ARROW_ARRAY_EQUALS(exp_output1, outputs.at(2));
721+
EXPECT_ARROW_ARRAY_EQUALS(exp_output1, outputs.at(3));
722+
}
652723
} // namespace gandiva

0 commit comments

Comments
 (0)