Skip to content

Commit df7babb

Browse files
authored
ARROW-17846: [C++] Use if constexpr in CSV subsystem (apache#14241)
Authored-by: Antoine Pitrou <antoine@python.org> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent f277f2e commit df7babb

4 files changed

Lines changed: 114 additions & 110 deletions

File tree

cpp/src/arrow/csv/converter.cc

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -96,16 +96,13 @@ Status InitializeTrie(const std::vector<std::string>& inputs, Trie* trie) {
9696

9797
// Presize a builder based on parser contents
9898
template <typename BuilderType>
99-
enable_if_t<!is_base_binary_type<typename BuilderType::TypeClass>::value, Status>
100-
PresizeBuilder(const BlockParser& parser, BuilderType* builder) {
101-
return builder->Resize(parser.num_rows());
102-
}
103-
104-
// Same, for variable-sized binary builders
105-
template <typename T>
106-
Status PresizeBuilder(const BlockParser& parser, BaseBinaryBuilder<T>* builder) {
99+
Status PresizeBuilder(const BlockParser& parser, BuilderType* builder) {
107100
RETURN_NOT_OK(builder->Resize(parser.num_rows()));
108-
return builder->ReserveData(parser.num_bytes());
101+
if constexpr (is_base_binary_type<typename BuilderType::TypeClass>::value) {
102+
return builder->ReserveData(parser.num_bytes());
103+
} else {
104+
return Status::OK();
105+
}
109106
}
110107

111108
/////////////////////////////////////////////////////////////////////////
@@ -195,19 +192,15 @@ struct BinaryValueDecoder : public ValueDecoder {
195192
// Value decoder for integers, floats and temporals
196193
//
197194

198-
template <typename T, typename Enable = void>
199-
struct StringConverterFromOptions {
200-
static arrow::internal::StringConverter<T> Make(const ConvertOptions&) {
201-
return arrow::internal::StringConverter<T>{};
202-
}
203-
};
204-
205195
template <typename T>
206-
struct StringConverterFromOptions<T, enable_if_floating_point<T>> {
207-
static arrow::internal::StringConverter<T> Make(const ConvertOptions& options) {
196+
static arrow::internal::StringConverter<T> MakeStringConverter(
197+
const ConvertOptions& options) {
198+
if constexpr (is_floating_type<T>::value) {
208199
return arrow::internal::StringConverter<T>{options.decimal_point};
200+
} else {
201+
return arrow::internal::StringConverter<T>{};
209202
}
210-
};
203+
}
211204

212205
template <typename T>
213206
struct NumericValueDecoder : public ValueDecoder {
@@ -217,7 +210,7 @@ struct NumericValueDecoder : public ValueDecoder {
217210
const ConvertOptions& options)
218211
: ValueDecoder(type, options),
219212
concrete_type_(checked_cast<const T&>(*type)),
220-
string_converter_(StringConverterFromOptions<T>::Make(options)) {}
213+
string_converter_(MakeStringConverter<T>(options)) {}
221214

222215
Status Decode(const uint8_t* data, uint32_t size, bool quoted, value_type* out) {
223216
// XXX should quoted values be allowed at all?

cpp/src/arrow/csv/writer.cc

Lines changed: 54 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -367,81 +367,67 @@ class QuotedColumnPopulator : public ColumnPopulator {
367367
std::vector<bool> row_needs_escaping_;
368368
};
369369

370-
struct PopulatorFactory {
371-
template <typename TypeClass>
372-
enable_if_t<is_base_binary_type<TypeClass>::value ||
373-
std::is_same<FixedSizeBinaryType, TypeClass>::value,
374-
Status>
375-
Visit(const TypeClass& type) {
376-
// Determine what ColumnPopulator to use based on desired CSV quoting style.
377-
switch (quoting_style) {
378-
case QuotingStyle::None:
379-
// In unquoted output we must reject values with quotes. Since these types can
380-
// produce quotes in their output rendering, we must check them and reject if
381-
// quotes appear, hence reject_values_with_quotes is set to true.
382-
populator = new UnquotedColumnPopulator(pool, end_chars, delimiter, null_string,
383-
/*reject_values_with_quotes=*/true);
384-
break;
385-
// Quoting is needed for strings/binary, or when all valid values need to be
386-
// quoted.
387-
case QuotingStyle::Needed:
388-
case QuotingStyle::AllValid:
389-
populator = new QuotedColumnPopulator(pool, end_chars, null_string);
390-
break;
370+
Result<std::unique_ptr<ColumnPopulator>> MakePopulator(
371+
const DataType& type, const std::string& end_chars, const char delimiter,
372+
const std::shared_ptr<Buffer>& null_string, QuotingStyle quoting_style,
373+
MemoryPool* pool) {
374+
auto make_populator =
375+
[&](const auto& type) -> Result<std::unique_ptr<ColumnPopulator>> {
376+
using Type = std::decay_t<decltype(type)>;
377+
378+
if constexpr (is_primitive_ctype<Type>::value || is_decimal_type<Type>::value ||
379+
is_null_type<Type>::value || is_temporal_type<Type>::value) {
380+
switch (quoting_style) {
381+
// These types are assumed not to produce any quotes, so we do not need to
382+
// check and reject for potential quotes in the casted values in case the
383+
// QuotingStyle is None.
384+
case QuotingStyle::None:
385+
[[fallthrough]];
386+
case QuotingStyle::Needed:
387+
return std::make_unique<UnquotedColumnPopulator>(
388+
pool, end_chars, delimiter, null_string,
389+
/*reject_values_with_quotes=*/false);
390+
case QuotingStyle::AllValid:
391+
return std::make_unique<QuotedColumnPopulator>(pool, end_chars, null_string);
392+
}
391393
}
392-
return Status::OK();
393-
}
394-
395-
template <typename TypeClass>
396-
enable_if_dictionary<TypeClass, Status> Visit(const TypeClass& type) {
397-
return VisitTypeInline(*type.value_type(), this);
398-
}
399394

400-
template <typename TypeClass>
401-
enable_if_t<is_nested_type<TypeClass>::value || is_extension_type<TypeClass>::value,
402-
Status>
403-
Visit(const TypeClass& type) {
404-
return Status::Invalid("Unsupported Type:", type.ToString());
405-
}
395+
if constexpr (is_base_binary_type<Type>::value ||
396+
std::is_same<Type, FixedSizeBinaryType>::value) {
397+
// Determine what ColumnPopulator to use based on desired CSV quoting style.
398+
switch (quoting_style) {
399+
case QuotingStyle::None:
400+
// In unquoted output we must reject values with quotes. Since these types
401+
// can produce quotes in their output rendering, we must check them and
402+
// reject if quotes appear, hence reject_values_with_quotes is set to true.
403+
return std::make_unique<UnquotedColumnPopulator>(
404+
pool, end_chars, delimiter, null_string,
405+
/*reject_values_with_quotes=*/true);
406+
// Quoting is needed for strings/binary, or when all valid values need to be
407+
// quoted.
408+
case QuotingStyle::Needed:
409+
[[fallthrough]];
410+
case QuotingStyle::AllValid:
411+
return std::make_unique<QuotedColumnPopulator>(pool, end_chars, null_string);
412+
}
413+
}
406414

407-
template <typename TypeClass>
408-
enable_if_t<is_primitive_ctype<TypeClass>::value || is_decimal_type<TypeClass>::value ||
409-
is_null_type<TypeClass>::value || is_temporal_type<TypeClass>::value,
410-
Status>
411-
Visit(const TypeClass& type) {
412-
// Determine what ColumnPopulator to use based on desired CSV quoting style.
413-
switch (quoting_style) {
414-
// These types are assumed not to produce any quotes, so we do not need to check
415-
// and reject for potential quotes in the casted values in case the QuotingStyle
416-
// is None.
417-
case QuotingStyle::None:
418-
case QuotingStyle::Needed:
419-
populator = new UnquotedColumnPopulator(pool, end_chars, delimiter, null_string,
420-
/*reject_values_with_quotes=*/false);
421-
break;
422-
case QuotingStyle::AllValid:
423-
populator = new QuotedColumnPopulator(pool, end_chars, null_string);
424-
break;
415+
if constexpr (std::is_same<Type, DictionaryType>::value) {
416+
return MakePopulator(*type.value_type(), end_chars, delimiter, null_string,
417+
quoting_style, pool);
425418
}
426-
return Status::OK();
427-
}
428419

429-
const std::string end_chars;
430-
const char delimiter;
431-
std::shared_ptr<Buffer> null_string;
432-
const QuotingStyle quoting_style;
433-
MemoryPool* pool;
434-
ColumnPopulator* populator;
435-
};
420+
return Status::Invalid("Unsupported Type:", type.ToString());
421+
};
422+
return VisitType(type, make_populator);
423+
}
436424

437425
Result<std::unique_ptr<ColumnPopulator>> MakePopulator(
438-
const Field& field, std::string end_chars, char delimiter,
439-
std::shared_ptr<Buffer> null_string, QuotingStyle quoting_style, MemoryPool* pool) {
440-
PopulatorFactory factory{std::move(end_chars), delimiter, std::move(null_string),
441-
quoting_style, pool, nullptr};
442-
443-
RETURN_NOT_OK(VisitTypeInline(*field.type(), &factory));
444-
return std::unique_ptr<ColumnPopulator>(factory.populator);
426+
const Field& field, const std::string& end_chars, char delimiter,
427+
const std::shared_ptr<Buffer>& null_string, QuotingStyle quoting_style,
428+
MemoryPool* pool) {
429+
return MakePopulator(*field.type(), end_chars, delimiter, null_string, quoting_style,
430+
pool);
445431
}
446432

447433
class CSVWriterImpl : public ipc::RecordBatchWriter {

cpp/src/arrow/type_traits.h

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -858,7 +858,7 @@ using enable_if_physical_floating_point =
858858
///
859859
/// \param[in] type_id the type-id to check
860860
/// \return whether type-id is an integer type one
861-
static inline bool is_integer(Type::type type_id) {
861+
constexpr bool is_integer(Type::type type_id) {
862862
switch (type_id) {
863863
case Type::UINT8:
864864
case Type::INT8:
@@ -879,7 +879,7 @@ static inline bool is_integer(Type::type type_id) {
879879
///
880880
/// \param[in] type_id the type-id to check
881881
/// \return whether type-id is a signed integer type one
882-
static inline bool is_signed_integer(Type::type type_id) {
882+
constexpr bool is_signed_integer(Type::type type_id) {
883883
switch (type_id) {
884884
case Type::INT8:
885885
case Type::INT16:
@@ -896,7 +896,7 @@ static inline bool is_signed_integer(Type::type type_id) {
896896
///
897897
/// \param[in] type_id the type-id to check
898898
/// \return whether type-id is an unsigned integer type one
899-
static inline bool is_unsigned_integer(Type::type type_id) {
899+
constexpr bool is_unsigned_integer(Type::type type_id) {
900900
switch (type_id) {
901901
case Type::UINT8:
902902
case Type::UINT16:
@@ -913,7 +913,7 @@ static inline bool is_unsigned_integer(Type::type type_id) {
913913
///
914914
/// \param[in] type_id the type-id to check
915915
/// \return whether type-id is a floating point type one
916-
static inline bool is_floating(Type::type type_id) {
916+
constexpr bool is_floating(Type::type type_id) {
917917
switch (type_id) {
918918
case Type::HALF_FLOAT:
919919
case Type::FLOAT:
@@ -931,7 +931,7 @@ static inline bool is_floating(Type::type type_id) {
931931
///
932932
/// \param[in] type_id the type-id to check
933933
/// \return whether type-id is a numeric type one
934-
static inline bool is_numeric(Type::type type_id) {
934+
constexpr bool is_numeric(Type::type type_id) {
935935
switch (type_id) {
936936
case Type::UINT8:
937937
case Type::INT8:
@@ -955,7 +955,7 @@ static inline bool is_numeric(Type::type type_id) {
955955
///
956956
/// \param[in] type_id the type-id to check
957957
/// \return whether type-id is a decimal type one
958-
static inline bool is_decimal(Type::type type_id) {
958+
constexpr bool is_decimal(Type::type type_id) {
959959
switch (type_id) {
960960
case Type::DECIMAL128:
961961
case Type::DECIMAL256:
@@ -972,7 +972,7 @@ static inline bool is_decimal(Type::type type_id) {
972972
///
973973
/// \param[in] type_id the type-id to check
974974
/// \return whether type-id is a primitive type one
975-
static inline bool is_primitive(Type::type type_id) {
975+
constexpr bool is_primitive(Type::type type_id) {
976976
switch (type_id) {
977977
case Type::BOOL:
978978
case Type::UINT8:
@@ -1009,7 +1009,7 @@ static inline bool is_primitive(Type::type type_id) {
10091009
///
10101010
/// \param[in] type_id the type-id to check
10111011
/// \return whether type-id is a base-binary-like type one
1012-
static inline bool is_base_binary_like(Type::type type_id) {
1012+
constexpr bool is_base_binary_like(Type::type type_id) {
10131013
switch (type_id) {
10141014
case Type::BINARY:
10151015
case Type::LARGE_BINARY:
@@ -1026,7 +1026,7 @@ static inline bool is_base_binary_like(Type::type type_id) {
10261026
///
10271027
/// \param[in] type_id the type-id to check
10281028
/// \return whether type-id is a binary-like type one
1029-
static inline bool is_binary_like(Type::type type_id) {
1029+
constexpr bool is_binary_like(Type::type type_id) {
10301030
switch (type_id) {
10311031
case Type::BINARY:
10321032
case Type::STRING:
@@ -1041,7 +1041,7 @@ static inline bool is_binary_like(Type::type type_id) {
10411041
///
10421042
/// \param[in] type_id the type-id to check
10431043
/// \return whether type-id is a large-binary-like type one
1044-
static inline bool is_large_binary_like(Type::type type_id) {
1044+
constexpr bool is_large_binary_like(Type::type type_id) {
10451045
switch (type_id) {
10461046
case Type::LARGE_BINARY:
10471047
case Type::LARGE_STRING:
@@ -1056,7 +1056,7 @@ static inline bool is_large_binary_like(Type::type type_id) {
10561056
///
10571057
/// \param[in] type_id the type-id to check
10581058
/// \return whether type-id is a binary type one
1059-
static inline bool is_binary(Type::type type_id) {
1059+
constexpr bool is_binary(Type::type type_id) {
10601060
switch (type_id) {
10611061
case Type::BINARY:
10621062
case Type::LARGE_BINARY:
@@ -1071,7 +1071,7 @@ static inline bool is_binary(Type::type type_id) {
10711071
///
10721072
/// \param[in] type_id the type-id to check
10731073
/// \return whether type-id is a string type one
1074-
static inline bool is_string(Type::type type_id) {
1074+
constexpr bool is_string(Type::type type_id) {
10751075
switch (type_id) {
10761076
case Type::STRING:
10771077
case Type::LARGE_STRING:
@@ -1086,7 +1086,7 @@ static inline bool is_string(Type::type type_id) {
10861086
///
10871087
/// \param[in] type_id the type-id to check
10881088
/// \return whether type-id is a temporal type one
1089-
static inline bool is_temporal(Type::type type_id) {
1089+
constexpr bool is_temporal(Type::type type_id) {
10901090
switch (type_id) {
10911091
case Type::DATE32:
10921092
case Type::DATE64:
@@ -1104,7 +1104,7 @@ static inline bool is_temporal(Type::type type_id) {
11041104
///
11051105
/// \param[in] type_id the type-id to check
11061106
/// \return whether type-id is an interval type one
1107-
static inline bool is_interval(Type::type type_id) {
1107+
constexpr bool is_interval(Type::type type_id) {
11081108
switch (type_id) {
11091109
case Type::INTERVAL_MONTHS:
11101110
case Type::INTERVAL_DAY_TIME:
@@ -1120,16 +1120,14 @@ static inline bool is_interval(Type::type type_id) {
11201120
///
11211121
/// \param[in] type_id the type-id to check
11221122
/// \return whether type-id is a dictionary type one
1123-
static inline bool is_dictionary(Type::type type_id) {
1124-
return type_id == Type::DICTIONARY;
1125-
}
1123+
constexpr bool is_dictionary(Type::type type_id) { return type_id == Type::DICTIONARY; }
11261124

11271125
/// \brief Check for a fixed-size-binary type
11281126
///
11291127
/// This predicate also matches decimals.
11301128
/// \param[in] type_id the type-id to check
11311129
/// \return whether type-id is a fixed-size-binary type one
1132-
static inline bool is_fixed_size_binary(Type::type type_id) {
1130+
constexpr bool is_fixed_size_binary(Type::type type_id) {
11331131
switch (type_id) {
11341132
case Type::DECIMAL128:
11351133
case Type::DECIMAL256:
@@ -1145,15 +1143,15 @@ static inline bool is_fixed_size_binary(Type::type type_id) {
11451143
///
11461144
/// \param[in] type_id the type-id to check
11471145
/// \return whether type-id is a fixed-width type one
1148-
static inline bool is_fixed_width(Type::type type_id) {
1146+
constexpr bool is_fixed_width(Type::type type_id) {
11491147
return is_primitive(type_id) || is_dictionary(type_id) || is_fixed_size_binary(type_id);
11501148
}
11511149

11521150
/// \brief Check for a list-like type
11531151
///
11541152
/// \param[in] type_id the type-id to check
11551153
/// \return whether type-id is a list-like type one
1156-
static inline bool is_list_like(Type::type type_id) {
1154+
constexpr bool is_list_like(Type::type type_id) {
11571155
switch (type_id) {
11581156
case Type::LIST:
11591157
case Type::LARGE_LIST:
@@ -1170,7 +1168,7 @@ static inline bool is_list_like(Type::type type_id) {
11701168
///
11711169
/// \param[in] type_id the type-id to check
11721170
/// \return whether type-id is a nested type one
1173-
static inline bool is_nested(Type::type type_id) {
1171+
constexpr bool is_nested(Type::type type_id) {
11741172
switch (type_id) {
11751173
case Type::LIST:
11761174
case Type::LARGE_LIST:
@@ -1190,7 +1188,7 @@ static inline bool is_nested(Type::type type_id) {
11901188
///
11911189
/// \param[in] type_id the type-id to check
11921190
/// \return whether type-id is a union type one
1193-
static inline bool is_union(Type::type type_id) {
1191+
constexpr bool is_union(Type::type type_id) {
11941192
switch (type_id) {
11951193
case Type::SPARSE_UNION:
11961194
case Type::DENSE_UNION:

0 commit comments

Comments
 (0)