Skip to content

Commit 3db4b9c

Browse files
committed
Review
1 parent 4b69655 commit 3db4b9c

16 files changed

Lines changed: 276 additions & 100 deletions

File tree

cpp/src/arrow/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ set(ARROW_SRCS
136136
util/string_builder.cc
137137
util/task_group.cc
138138
util/thread_pool.cc
139+
util/time.cc
139140
util/trie.cc
140141
util/uri.cc
141142
util/utf8.cc

cpp/src/arrow/compute/kernels/cast.cc

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "arrow/util/logging.h"
3838
#include "arrow/util/macros.h"
3939
#include "arrow/util/parsing.h" // IWYU pragma: keep
40+
#include "arrow/util/time.h"
4041
#include "arrow/util/utf8.h"
4142
#include "arrow/visitor_inline.h"
4243

@@ -407,16 +408,17 @@ struct CastFunctor<
407408
// From one timestamp to another
408409

409410
template <typename in_type, typename out_type>
410-
void ShiftTime(FunctionContext* ctx, const CastOptions& options, const bool is_multiply,
411-
const int64_t factor, const ArrayData& input, ArrayData* output) {
411+
void ShiftTime(FunctionContext* ctx, const CastOptions& options,
412+
const util::DivideOrMultiply factor_op, const int64_t factor,
413+
const ArrayData& input, ArrayData* output) {
412414
const in_type* in_data = input.GetValues<in_type>(1);
413415
auto out_data = output->GetMutableValues<out_type>(1);
414416

415417
if (factor == 1) {
416418
for (int64_t i = 0; i < input.length; i++) {
417419
out_data[i] = static_cast<out_type>(in_data[i]);
418420
}
419-
} else if (is_multiply) {
421+
} else if (factor_op == util::MULTIPLY) {
420422
if (options.allow_time_overflow) {
421423
for (int64_t i = 0; i < input.length; i++) {
422424
out_data[i] = static_cast<out_type>(in_data[i] * factor);
@@ -488,18 +490,6 @@ void ShiftTime(FunctionContext* ctx, const CastOptions& options, const bool is_m
488490
}
489491
}
490492

491-
namespace {
492-
493-
// {is_multiply, factor}
494-
const std::pair<bool, int64_t> kTimeConversionTable[4][4] = {
495-
{{true, 1}, {true, 1000}, {true, 1000000}, {true, 1000000000L}}, // SECOND
496-
{{false, 1000}, {true, 1}, {true, 1000}, {true, 1000000}}, // MILLI
497-
{{false, 1000000}, {false, 1000}, {true, 1}, {true, 1000}}, // MICRO
498-
{{false, 1000000000L}, {false, 1000000}, {false, 1000}, {true, 1}}, // NANO
499-
};
500-
501-
} // namespace
502-
503493
// <TimestampType, TimestampType> and <DurationType, DurationType>
504494
template <typename O, typename I>
505495
struct CastFunctor<
@@ -517,10 +507,8 @@ struct CastFunctor<
517507
return;
518508
}
519509

520-
std::pair<bool, int64_t> conversion =
521-
kTimeConversionTable[static_cast<int>(in_type.unit())]
522-
[static_cast<int>(out_type.unit())];
523-
510+
auto conversion = util::kTimestampConversionTable[static_cast<int>(in_type.unit())]
511+
[static_cast<int>(out_type.unit())];
524512
ShiftTime<int64_t, int64_t>(ctx, options, conversion.first, conversion.second, input,
525513
output);
526514
}
@@ -540,7 +528,7 @@ struct CastFunctor<Date32Type, TimestampType> {
540528
};
541529

542530
const int64_t factor = kTimestampToDateFactors[static_cast<int>(in_type.unit())];
543-
ShiftTime<int64_t, int32_t>(ctx, options, false, factor, input, output);
531+
ShiftTime<int64_t, int32_t>(ctx, options, util::DIVIDE, factor, input, output);
544532
}
545533
};
546534

@@ -550,10 +538,8 @@ struct CastFunctor<Date64Type, TimestampType> {
550538
const ArrayData& input, ArrayData* output) {
551539
const auto& in_type = checked_cast<const TimestampType&>(*input.type);
552540

553-
std::pair<bool, int64_t> conversion =
554-
kTimeConversionTable[static_cast<int>(in_type.unit())]
555-
[static_cast<int>(TimeUnit::MILLI)];
556-
541+
auto conversion = util::kTimestampConversionTable[static_cast<int>(in_type.unit())]
542+
[static_cast<int>(TimeUnit::MILLI)];
557543
ShiftTime<int64_t, int64_t>(ctx, options, conversion.first, conversion.second, input,
558544
output);
559545
if (!ctx->status().ok()) {
@@ -611,9 +597,8 @@ struct CastFunctor<O, I, enable_if_t<is_time_type<I>::value && is_time_type<O>::
611597
return;
612598
}
613599

614-
std::pair<bool, int64_t> conversion =
615-
kTimeConversionTable[static_cast<int>(in_type.unit())]
616-
[static_cast<int>(out_type.unit())];
600+
auto conversion = util::kTimestampConversionTable[static_cast<int>(in_type.unit())]
601+
[static_cast<int>(out_type.unit())];
617602

618603
ShiftTime<in_t, out_t>(ctx, options, conversion.first, conversion.second, input,
619604
output);
@@ -627,15 +612,17 @@ template <>
627612
struct CastFunctor<Date64Type, Date32Type> {
628613
void operator()(FunctionContext* ctx, const CastOptions& options,
629614
const ArrayData& input, ArrayData* output) {
630-
ShiftTime<int32_t, int64_t>(ctx, options, true, kMillisecondsInDay, input, output);
615+
ShiftTime<int32_t, int64_t>(ctx, options, util::MULTIPLY, kMillisecondsInDay, input,
616+
output);
631617
}
632618
};
633619

634620
template <>
635621
struct CastFunctor<Date32Type, Date64Type> {
636622
void operator()(FunctionContext* ctx, const CastOptions& options,
637623
const ArrayData& input, ArrayData* output) {
638-
ShiftTime<int64_t, int32_t>(ctx, options, false, kMillisecondsInDay, input, output);
624+
ShiftTime<int64_t, int32_t>(ctx, options, util::DIVIDE, kMillisecondsInDay, input,
625+
output);
639626
}
640627
};
641628

cpp/src/arrow/compute/kernels/sum.cc

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,11 @@ struct SumState {
4242
std::shared_ptr<Scalar> Finalize() const {
4343
using ScalarType = typename TypeTraits<SumType>::ScalarType;
4444

45-
auto boxed = std::make_shared<ScalarType>(this->sum);
4645
if (count == 0) {
47-
// TODO(wesm): Currently null, but fix this
48-
boxed->is_valid = false;
46+
return std::make_shared<ScalarType>();
4947
}
5048

51-
return std::move(boxed);
49+
return MakeScalar(sum);
5250
}
5351

5452
static std::shared_ptr<DataType> out_type() {

cpp/src/arrow/compute/test_util.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,7 @@ struct DatumEqual<Type, enable_if_integer<Type>> {
9797
if (lhs.kind() == Datum::SCALAR) {
9898
auto left = internal::checked_cast<const ScalarType*>(lhs.scalar().get());
9999
auto right = internal::checked_cast<const ScalarType*>(rhs.scalar().get());
100-
ASSERT_EQ(left->is_valid, right->is_valid);
101-
ASSERT_EQ(left->type->id(), right->type->id());
102-
ASSERT_EQ(left->value, right->value);
100+
ASSERT_EQ(*left, *right);
103101
}
104102
}
105103
};

cpp/src/arrow/dataset/filter.cc

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -704,7 +704,7 @@ bool ComparisonExpression::Equals(const Expression& other) const {
704704

705705
bool ScalarExpression::Equals(const Expression& other) const {
706706
return other.type() == ExpressionType::SCALAR &&
707-
value_->Equals(checked_cast<const ScalarExpression&>(other).value_);
707+
value_->Equals(*checked_cast<const ScalarExpression&>(other).value_);
708708
}
709709

710710
bool FieldExpression::Equals(const Expression& other) const {
@@ -880,9 +880,7 @@ Result<std::shared_ptr<DataType>> CastExpression::Validate(const Schema& schema)
880880
// if the operand is a scalar leaf.
881881
if (operand_->type() == ExpressionType::SCALAR) {
882882
auto scalar_expr = checked_pointer_cast<ScalarExpression>(operand_);
883-
auto result = scalar_expr->value()->CastTo(to_type);
884-
// Test if the cast is implemented.
885-
RETURN_NOT_OK(result.status());
883+
ARROW_ASSIGN_OR_RAISE(std::ignore, scalar_expr->value()->CastTo(to_type));
886884
return to_type;
887885
}
888886

@@ -1214,7 +1212,7 @@ Result<std::shared_ptr<RecordBatch>> TreeEvaluator::Filter(
12141212
selection.kind(), " of type ", *selection.type());
12151213
}
12161214

1217-
if (BooleanScalar(true).Equals(selection.scalar())) {
1215+
if (BooleanScalar(true).Equals(*selection.scalar())) {
12181216
return batch;
12191217
}
12201218

cpp/src/arrow/dataset/filter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ bool Expression::Equals(T&& t) const {
491491
return false;
492492
}
493493
auto s = MakeScalar(std::forward<T>(t));
494-
return internal::checked_cast<const ScalarExpression&>(*this).value()->Equals(s);
494+
return internal::checked_cast<const ScalarExpression&>(*this).value()->Equals(*s);
495495
}
496496

497497
template <typename Visitor>

cpp/src/arrow/scalar.cc

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "arrow/util/formatting.h"
3131
#include "arrow/util/logging.h"
3232
#include "arrow/util/parsing.h"
33+
#include "arrow/util/time.h"
3334
#include "arrow/visitor_inline.h"
3435

3536
namespace arrow {
@@ -75,8 +76,6 @@ template <typename T, typename R = void>
7576
using enable_if_scalar_constructor_has_no_arrow_type =
7677
typename std::enable_if<!scalar_constructor_has_arrow_type<T>::value, R>::type;
7778

78-
// TODO(bkietz) This doesn't need a factory. Just rewrite all scalars to be generically
79-
// constructible (is_simple_scalar should apply to all scalars)
8079
struct MakeNullImpl {
8180
template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType>
8281
enable_if_scalar_constructor_has_arrow_type<T, Status> Visit(const T&) {
@@ -217,8 +216,7 @@ CastImpl(const TemporalScalar<From>& from, NumericScalar<To>* to) {
217216

218217
// timestamp to timestamp
219218
Status CastImpl(const TimestampScalar& from, TimestampScalar* to) {
220-
to->value = from.value;
221-
return Status::OK();
219+
return util::ConvertTimestampValue(from.type, to->type, from.value).Value(&to->value);
222220
}
223221

224222
// string to any

cpp/src/arrow/scalar.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "arrow/type.h"
3434
#include "arrow/type_fwd.h"
3535
#include "arrow/type_traits.h"
36+
#include "arrow/util/compare.h"
3637
#include "arrow/util/decimal.h"
3738
#include "arrow/util/logging.h"
3839
#include "arrow/util/string_view.h"
@@ -44,7 +45,7 @@ class Array;
4445

4546
/// \brief Base class for scalar values, representing a single value occupying
4647
/// an array "slot"
47-
struct ARROW_EXPORT Scalar {
48+
struct ARROW_EXPORT Scalar : public util::EqualityComparable<Scalar> {
4849
virtual ~Scalar() = default;
4950

5051
explicit Scalar(const std::shared_ptr<DataType>& type) : type(type), is_valid(false) {}
@@ -56,10 +57,6 @@ struct ARROW_EXPORT Scalar {
5657
bool is_valid = false;
5758

5859
bool Equals(const Scalar& other) const;
59-
bool Equals(const std::shared_ptr<Scalar>& other) const {
60-
if (other) return Equals(*other);
61-
return false;
62-
}
6360

6461
std::string ToString() const;
6562

@@ -101,7 +98,7 @@ struct ARROW_EXPORT PrimitiveScalar : public Scalar {
10198

10299
PrimitiveScalar() : Scalar(TypeTraits<T>::type_singleton()) {}
103100

104-
ValueType value;
101+
ValueType value{};
105102
};
106103

107104
} // namespace internal

0 commit comments

Comments
 (0)