arrow/cpp/src/arrow/util/decimal.cc at master · OpenSourceJavaProject/arrow

907 lines (793 loc) · 33.1 KB
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//   http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
#include <algorithm>
#include <array>
#include <climits>
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <iomanip>
#include <limits>
#include <ostream>
#include <sstream>
#include <string>
#include "arrow/status.h"
#include "arrow/util/decimal.h"
#include "arrow/util/endian.h"
#include "arrow/util/formatting.h"
#include "arrow/util/int128_internal.h"
#include "arrow/util/int_util_overflow.h"
#include "arrow/util/logging.h"
#include "arrow/util/macros.h"
#include "arrow/util/value_parsing.h"
namespace arrow {
using internal::SafeLeftShift;
using internal::SafeSignedAdd;
using internal::uint128_t;
Decimal128::Decimal128(const std::string& str) : Decimal128() {
  *this = Decimal128::FromString(str).ValueOrDie();
static constexpr auto kInt64DecimalDigits =
    static_cast<size_t>(std::numeric_limits<int64_t>::digits10);
static constexpr uint64_t kUInt64PowersOfTen[kInt64DecimalDigits + 1] = {
    // clang-format off
    100ULL,
    1000ULL,
    10000ULL,
    100000ULL,
    1000000ULL,
    10000000ULL,
    100000000ULL,
    1000000000ULL,
    10000000000ULL,
    // clang-format on
static constexpr float kFloatPowersOfTen[2 * 38 + 1] = {
    1e-38f, 1e-37f, 1e-36f, 1e-35f, 1e-34f, 1e-33f, 1e-32f, 1e-31f, 1e-30f, 1e-29f,
    1e-28f, 1e-27f, 1e-26f, 1e-25f, 1e-24f, 1e-23f, 1e-22f, 1e-21f, 1e-20f, 1e-19f,
    1e-18f, 1e-17f, 1e-16f, 1e-15f, 1e-14f, 1e-13f, 1e-12f, 1e-11f, 1e-10f, 1e-9f,
    1e-8f,  1e-7f,  1e-6f,  1e-5f,  1e-4f,  1e-3f,  1e-2f,  1e-1f,  1e0f,   1e1f,
    1e2f,   1e3f,   1e4f,   1e5f,   1e6f,   1e7f,   1e8f,   1e9f,   1e10f,  1e11f,
    1e12f,  1e13f,  1e14f,  1e15f,  1e16f,  1e17f,  1e18f,  1e19f,  1e20f,  1e21f,
    1e22f,  1e23f,  1e24f,  1e25f,  1e26f,  1e27f,  1e28f,  1e29f,  1e30f,  1e31f,
    1e32f,  1e33f,  1e34f,  1e35f,  1e36f,  1e37f,  1e38f};
static constexpr double kDoublePowersOfTen[2 * 38 + 1] = {
    1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28,
    1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
    1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,  1e-8,  1e-7,  1e-6,
    1e-5,  1e-4,  1e-3,  1e-2,  1e-1,  1e0,   1e1,   1e2,   1e3,   1e4,   1e5,
    1e6,   1e7,   1e8,   1e9,   1e10,  1e11,  1e12,  1e13,  1e14,  1e15,  1e16,
    1e17,  1e18,  1e19,  1e20,  1e21,  1e22,  1e23,  1e24,  1e25,  1e26,  1e27,
    1e28,  1e29,  1e30,  1e31,  1e32,  1e33,  1e34,  1e35,  1e36,  1e37,  1e38};
// On the Windows R toolchain, INFINITY is double type instead of float
static constexpr float kFloatInf = std::numeric_limits<float>::infinity();
static constexpr float kFloatPowersOfTen76[2 * 76 + 1] = {
    0,         0,         0,         1e-45f,    1e-44f,    1e-43f,    1e-42f,
    1e-41f,    1e-40f,    1e-39f,    1e-38f,    1e-37f,    1e-36f,    1e-35f,
    1e-34f,    1e-33f,    1e-32f,    1e-31f,    1e-30f,    1e-29f,    1e-28f,
    1e-27f,    1e-26f,    1e-25f,    1e-24f,    1e-23f,    1e-22f,    1e-21f,
    1e-20f,    1e-19f,    1e-18f,    1e-17f,    1e-16f,    1e-15f,    1e-14f,
    1e-13f,    1e-12f,    1e-11f,    1e-10f,    1e-9f,     1e-8f,     1e-7f,
    1e-6f,     1e-5f,     1e-4f,     1e-3f,     1e-2f,     1e-1f,     1e0f,
    1e1f,      1e2f,      1e3f,      1e4f,      1e5f,      1e6f,      1e7f,
    1e8f,      1e9f,      1e10f,     1e11f,     1e12f,     1e13f,     1e14f,
    1e15f,     1e16f,     1e17f,     1e18f,     1e19f,     1e20f,     1e21f,
    1e22f,     1e23f,     1e24f,     1e25f,     1e26f,     1e27f,     1e28f,
    1e29f,     1e30f,     1e31f,     1e32f,     1e33f,     1e34f,     1e35f,
    1e36f,     1e37f,     1e38f,     kFloatInf, kFloatInf, kFloatInf, kFloatInf,
    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf};
static constexpr double kDoublePowersOfTen76[2 * 76 + 1] = {
    1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65,
    1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53,
    1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41,
    1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29,
    1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
    1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,  1e-8,  1e-7,  1e-6,  1e-5,
    1e-4,  1e-3,  1e-2,  1e-1,  1e0,   1e1,   1e2,   1e3,   1e4,   1e5,   1e6,   1e7,
    1e8,   1e9,   1e10,  1e11,  1e12,  1e13,  1e14,  1e15,  1e16,  1e17,  1e18,  1e19,
    1e20,  1e21,  1e22,  1e23,  1e24,  1e25,  1e26,  1e27,  1e28,  1e29,  1e30,  1e31,
    1e32,  1e33,  1e34,  1e35,  1e36,  1e37,  1e38,  1e39,  1e40,  1e41,  1e42,  1e43,
    1e44,  1e45,  1e46,  1e47,  1e48,  1e49,  1e50,  1e51,  1e52,  1e53,  1e54,  1e55,
    1e56,  1e57,  1e58,  1e59,  1e60,  1e61,  1e62,  1e63,  1e64,  1e65,  1e66,  1e67,
    1e68,  1e69,  1e70,  1e71,  1e72,  1e73,  1e74,  1e75,  1e76};
namespace {
template <typename Real, typename Derived>
struct DecimalRealConversion {
  static Result<Decimal128> FromPositiveReal(Real real, int32_t precision,
    auto x = real;
    if (scale >= -38 && scale <= 38) {
      x *= Derived::powers_of_ten()[scale + 38];
    } else {
      x *= std::pow(static_cast<Real>(10), static_cast<Real>(scale));
    x = std::nearbyint(x);
    const auto max_abs = Derived::powers_of_ten()[precision + 38];
    if (x <= -max_abs || x >= max_abs) {
      return Status::Invalid("Cannot convert ", real,
                             " to Decimal128(precision = ", precision,
                             ", scale = ", scale, "): overflow");
    // Extract high and low bits
    const auto high = std::floor(std::ldexp(x, -64));
    const auto low = x - std::ldexp(high, 64);
    DCHECK_GE(high, -9.223372036854776e+18);  // -2**63
    DCHECK_LT(high, 9.223372036854776e+18);   // 2**63
    DCHECK_GE(low, 0);
    DCHECK_LT(low, 1.8446744073709552e+19);  // 2**64
    return Decimal128(static_cast<int64_t>(high), static_cast<uint64_t>(low));
  static Result<Decimal128> FromReal(Real x, int32_t precision, int32_t scale) {
    DCHECK_GT(precision, 0);
    DCHECK_LE(precision, 38);
    if (!std::isfinite(x)) {
      return Status::Invalid("Cannot convert ", x, " to Decimal128");
    if (x < 0) {
      ARROW_ASSIGN_OR_RAISE(auto dec, FromPositiveReal(-x, precision, scale));
      return dec.Negate();
    } else {
      // Includes negative zero
      return FromPositiveReal(x, precision, scale);
  static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
    Real x = static_cast<Real>(decimal.high_bits()) * Derived::two_to_64();
    x += static_cast<Real>(decimal.low_bits());
    if (scale >= -38 && scale <= 38) {
      x *= Derived::powers_of_ten()[-scale + 38];
    } else {
      x *= std::pow(static_cast<Real>(10), static_cast<Real>(-scale));
    return x;
  static Real ToReal(Decimal128 decimal, int32_t scale) {
    if (decimal.high_bits() < 0) {
      // Convert the absolute value to avoid precision loss
      decimal.Negate();
      return -ToRealPositive(decimal, scale);
    } else {
      return ToRealPositive(decimal, scale);
struct DecimalFloatConversion
    : public DecimalRealConversion<float, DecimalFloatConversion> {
  static constexpr const float* powers_of_ten() { return kFloatPowersOfTen; }
  static constexpr float two_to_64() { return 1.8446744e+19f; }
struct DecimalDoubleConversion
    : public DecimalRealConversion<double, DecimalDoubleConversion> {
  static constexpr const double* powers_of_ten() { return kDoublePowersOfTen; }
  static constexpr double two_to_64() { return 1.8446744073709552e+19; }
}  // namespace
Result<Decimal128> Decimal128::FromReal(float x, int32_t precision, int32_t scale) {
  return DecimalFloatConversion::FromReal(x, precision, scale);
Result<Decimal128> Decimal128::FromReal(double x, int32_t precision, int32_t scale) {
  return DecimalDoubleConversion::FromReal(x, precision, scale);
float Decimal128::ToFloat(int32_t scale) const {
  return DecimalFloatConversion::ToReal(*this, scale);
double Decimal128::ToDouble(int32_t scale) const {
  return DecimalDoubleConversion::ToReal(*this, scale);
template <size_t n>
static void AppendLittleEndianArrayToString(const std::array<uint64_t, n>& array,
                                            std::string* result) {
  const auto most_significant_non_zero =
      find_if(array.rbegin(), array.rend(), [](uint64_t v) { return v != 0; });
  if (most_significant_non_zero == array.rend()) {
    result->push_back('0');
    return;
  size_t most_significant_elem_idx = &*most_significant_non_zero - array.data();
  std::array<uint64_t, n> copy = array;
  constexpr uint32_t k1e9 = 1000000000U;
  constexpr size_t kNumBits = n * 64;
  // Segments will contain the array split into groups that map to decimal digits,
  // in little endian order. Each segment will hold at most 9 decimal digits.
  // For example, if the input represents 9876543210123456789, then segments will be
  // [123456789, 876543210, 9].
  // The max number of segments needed = ceil(kNumBits * log(2) / log(1e9))
  // = ceil(kNumBits / 29.897352854) <= ceil(kNumBits / 29).
  std::array<uint32_t, (kNumBits + 28) / 29> segments;
  size_t num_segments = 0;
  uint64_t* most_significant_elem = &copy[most_significant_elem_idx];
    // Compute remainder = copy % 1e9 and copy = copy / 1e9.
    uint32_t remainder = 0;
    uint64_t* elem = most_significant_elem;
      // Compute dividend = (remainder << 32) | *elem  (a virtual 96-bit integer);
      // *elem = dividend / 1e9;
      // remainder = dividend % 1e9.
      uint32_t hi = static_cast<uint32_t>(*elem >> 32);
      uint32_t lo = static_cast<uint32_t>(*elem & bit_util::LeastSignificantBitMask(32));
      uint64_t dividend_hi = (static_cast<uint64_t>(remainder) << 32) | hi;
      uint64_t quotient_hi = dividend_hi / k1e9;
      remainder = static_cast<uint32_t>(dividend_hi % k1e9);
      uint64_t dividend_lo = (static_cast<uint64_t>(remainder) << 32) | lo;
      uint64_t quotient_lo = dividend_lo / k1e9;
      remainder = static_cast<uint32_t>(dividend_lo % k1e9);
      *elem = (quotient_hi << 32) | quotient_lo;
    } while (elem-- != copy.data());
    segments[num_segments++] = remainder;
  } while (*most_significant_elem != 0 || most_significant_elem-- != copy.data());
  size_t old_size = result->size();
  size_t new_size = old_size + num_segments * 9;
  result->resize(new_size, '0');
  char* output = &result->at(old_size);
  const uint32_t* segment = &segments[num_segments - 1];
  internal::StringFormatter<UInt32Type> format;
  // First segment is formatted as-is.
  format(*segment, [&output](std::string_view formatted) {
    memcpy(output, formatted.data(), formatted.size());
    output += formatted.size();
  while (segment != segments.data()) {
    --segment;
    // Right-pad formatted segment such that e.g. 123 is formatted as "000000123".
    output += 9;
    format(*segment, [output](std::string_view formatted) {
      memcpy(output - formatted.size(), formatted.data(), formatted.size());
  result->resize(output - result->data());
std::string Decimal128::ToIntegerString() const {
  std::string result;
  if (high_bits() < 0) {
    result.push_back('-');
    Decimal128 abs = *this;
    abs.Negate();
    AppendLittleEndianArrayToString<2>(
        {abs.low_bits(), static_cast<uint64_t>(abs.high_bits())}, &result);
    AppendLittleEndianArrayToString<2>({low_bits(), static_cast<uint64_t>(high_bits())},
  return result;
Decimal128::operator int64_t() const {
  DCHECK(high_bits() == 0 || high_bits() == -1)
      << "Trying to cast a Decimal128 greater than the value range of a "
         "int64_t; high_bits() must be equal to 0 or -1, got: "
      << high_bits();
  return static_cast<int64_t>(low_bits());
static void AdjustIntegerStringWithScale(int32_t scale, std::string* str) {
  if (scale == 0) {
    return;
  DCHECK(str != nullptr);
  DCHECK(!str->empty());
  const bool is_negative = str->front() == '-';
  const auto is_negative_offset = static_cast<int32_t>(is_negative);
  const auto len = static_cast<int32_t>(str->size());
  const int32_t num_digits = len - is_negative_offset;
  const int32_t adjusted_exponent = num_digits - 1 - scale;
  /// Note that the -6 is taken from the Java BigDecimal documentation.
  if (scale < 0 || adjusted_exponent < -6) {
    // Example 1:
    // Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = -2,
    //               adjusted_exponent = 4
    // After inserting decimal point: *str = "1.23"
    // After appending exponent: *str = "1.23E+4"
    // Example 2:
    // Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 9,
    //               adjusted_exponent = -7
    // After inserting decimal point: *str = "-1.23"
    // After appending exponent: *str = "-1.23E-7"
    str->insert(str->begin() + 1 + is_negative_offset, '.');
    str->push_back('E');
    if (adjusted_exponent >= 0) {
      str->push_back('+');
    internal::StringFormatter<Int32Type> format;
    format(adjusted_exponent, [str](std::string_view formatted) {
      str->append(formatted.data(), formatted.size());
    return;
  if (num_digits > scale) {
    const auto n = static_cast<size_t>(len - scale);
    // Example 1:
    // Precondition: *str = "123", len = num_digits = 3, scale = 1, n = 2
    // After inserting decimal point: *str = "12.3"
    // Example 2:
    // Precondition: *str = "-123", len = 4, num_digits = 3, scale = 1, n = 3
    // After inserting decimal point: *str = "-12.3"
    str->insert(str->begin() + n, '.');
    return;
  // Example 1:
  // Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = 4
  // After insert: *str = "000123"
  // After setting decimal point: *str = "0.0123"
  // Example 2:
  // Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 4
  // After insert: *str = "-000123"
  // After setting decimal point: *str = "-0.0123"
  str->insert(is_negative_offset, scale - num_digits + 2, '0');
  str->at(is_negative_offset + 1) = '.';
std::string Decimal128::ToString(int32_t scale) const {
  if (ARROW_PREDICT_FALSE(scale < -kMaxScale || scale > kMaxScale)) {
    return "<scale out of range, cannot format Decimal128 value>";
  std::string str(ToIntegerString());
  AdjustIntegerStringWithScale(scale, &str);
  return str;
// Iterates over input and for each group of kInt64DecimalDigits multiple out by
// the appropriate power of 10 necessary to add source parsed as uint64 and
// then adds the parsed value of source.
static inline void ShiftAndAdd(std::string_view input, uint64_t out[], size_t out_size) {
  for (size_t posn = 0; posn < input.size();) {
    const size_t group_size = std::min(kInt64DecimalDigits, input.size() - posn);
    const uint64_t multiple = kUInt64PowersOfTen[group_size];
    uint64_t chunk = 0;
    ARROW_CHECK(
        internal::ParseValue<UInt64Type>(input.data() + posn, group_size, &chunk));
    for (size_t i = 0; i < out_size; ++i) {
      uint128_t tmp = out[i];
      tmp *= multiple;
      tmp += chunk;
      out[i] = static_cast<uint64_t>(tmp & 0xFFFFFFFFFFFFFFFFULL);
      chunk = static_cast<uint64_t>(tmp >> 64);
    posn += group_size;
namespace {
struct DecimalComponents {
  std::string_view whole_digits;
  std::string_view fractional_digits;
  int32_t exponent = 0;
  char sign = 0;
  bool has_exponent = false;
inline bool IsSign(char c) { return c == '-' || c == '+'; }
inline bool IsDot(char c) { return c == '.'; }
inline bool IsDigit(char c) { return c >= '0' && c <= '9'; }
inline bool StartsExponent(char c) { return c == 'e' || c == 'E'; }
inline size_t ParseDigitsRun(const char* s, size_t start, size_t size,
                             std::string_view* out) {
  size_t pos;
  for (pos = start; pos < size; ++pos) {
    if (!IsDigit(s[pos])) {
      break;
  *out = std::string_view(s + start, pos - start);
  return pos;
bool ParseDecimalComponents(const char* s, size_t size, DecimalComponents* out) {
  size_t pos = 0;
  if (size == 0) {
    return false;
  // Sign of the number
  if (IsSign(s[pos])) {
    out->sign = *(s + pos);
  // First run of digits
  pos = ParseDigitsRun(s, pos, size, &out->whole_digits);
  if (pos == size) {
    return !out->whole_digits.empty();
  // Optional dot (if given in fractional form)
  bool has_dot = IsDot(s[pos]);
  if (has_dot) {
    // Second run of digits
    pos = ParseDigitsRun(s, pos, size, &out->fractional_digits);
  if (out->whole_digits.empty() && out->fractional_digits.empty()) {
    // Need at least some digits (whole or fractional)
    return false;
  if (pos == size) {
    return true;
  // Optional exponent
  if (StartsExponent(s[pos])) {
    if (pos != size && s[pos] == '+') {
      ++pos;
    out->has_exponent = true;
    return internal::ParseValue<Int32Type>(s + pos, size - pos, &(out->exponent));
  return pos == size;
inline Status ToArrowStatus(DecimalStatus dstatus, int num_bits) {
  switch (dstatus) {
    case DecimalStatus::kSuccess:
      return Status::OK();
    case DecimalStatus::kDivideByZero:
      return Status::Invalid("Division by 0 in Decimal", num_bits);
    case DecimalStatus::kOverflow:
      return Status::Invalid("Overflow occurred during Decimal", num_bits, " operation.");
    case DecimalStatus::kRescaleDataLoss:
      return Status::Invalid("Rescaling Decimal", num_bits,
                             " value would cause data loss");
  return Status::OK();
template <typename Decimal>
Status DecimalFromString(const char* type_name, std::string_view s, Decimal* out,
                         int32_t* precision, int32_t* scale) {
  if (s.empty()) {
    return Status::Invalid("Empty string cannot be converted to ", type_name);
  DecimalComponents dec;
  if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
    return Status::Invalid("The string '", s, "' is not a valid ", type_name, " number");
  // Count number of significant digits (without leading zeros)
  size_t first_non_zero = dec.whole_digits.find_first_not_of('0');
  size_t significant_digits = dec.fractional_digits.size();
  if (first_non_zero != std::string::npos) {
    significant_digits += dec.whole_digits.size() - first_non_zero;
  int32_t parsed_precision = static_cast<int32_t>(significant_digits);
  int32_t parsed_scale = 0;
  if (dec.has_exponent) {
    auto adjusted_exponent = dec.exponent;
    parsed_scale =
        -adjusted_exponent + static_cast<int32_t>(dec.fractional_digits.size());
    parsed_scale = static_cast<int32_t>(dec.fractional_digits.size());
  if (out != nullptr) {
    static_assert(Decimal::kBitWidth % 64 == 0, "decimal bit-width not a multiple of 64");
    std::array<uint64_t, Decimal::kBitWidth / 64> little_endian_array{};
    ShiftAndAdd(dec.whole_digits, little_endian_array.data(), little_endian_array.size());
    ShiftAndAdd(dec.fractional_digits, little_endian_array.data(),
                little_endian_array.size());
    *out = Decimal(bit_util::little_endian::ToNative(little_endian_array));
    if (dec.sign == '-') {
      out->Negate();
  if (parsed_scale < 0) {
    // Force the scale to zero, to avoid negative scales (due to compatibility issues
    // with external systems such as databases)
    if (-parsed_scale > Decimal::kMaxScale) {
      return Status::Invalid("The string '", s, "' cannot be represented as ", type_name);
    if (out != nullptr) {
      *out *= Decimal::GetScaleMultiplier(-parsed_scale);
    parsed_precision -= parsed_scale;
    parsed_scale = 0;
  if (precision != nullptr) {
    *precision = parsed_precision;
  if (scale != nullptr) {
    *scale = parsed_scale;
  return Status::OK();
}  // namespace
Status Decimal128::FromString(std::string_view s, Decimal128* out, int32_t* precision,
                              int32_t* scale) {
  return DecimalFromString("decimal128", s, out, precision, scale);
Status Decimal128::FromString(const std::string& s, Decimal128* out, int32_t* precision,
                              int32_t* scale) {
  return FromString(std::string_view(s), out, precision, scale);
Status Decimal128::FromString(const char* s, Decimal128* out, int32_t* precision,
                              int32_t* scale) {
  return FromString(std::string_view(s), out, precision, scale);
Result<Decimal128> Decimal128::FromString(std::string_view s) {
  Decimal128 out;
  RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr));
  return std::move(out);
Result<Decimal128> Decimal128::FromString(const std::string& s) {
  return FromString(std::string_view(s));
Result<Decimal128> Decimal128::FromString(const char* s) {
  return FromString(std::string_view(s));
// Helper function used by Decimal128::FromBigEndian
static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) {
  // We don't bounds check the length here because this is called by
  // FromBigEndian that has a Decimal128 as its out parameters and
  // that function is already checking the length of the bytes and only
  // passes lengths between zero and eight.
  uint64_t result = 0;
  // Using memcpy instead of special casing for length
  // and doing the conversion in 16, 32 parts, which could
  // possibly create unaligned memory access on certain platforms
  memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
  return ::arrow::bit_util::FromBigEndian(result);
Result<Decimal128> Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length) {
  static constexpr int32_t kMinDecimalBytes = 1;
  static constexpr int32_t kMaxDecimalBytes = 16;
  int64_t high, low;
  if (ARROW_PREDICT_FALSE(length < kMinDecimalBytes || length > kMaxDecimalBytes)) {
    return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian ",
                           "was ", length, ", but must be between ", kMinDecimalBytes,
                           " and ", kMaxDecimalBytes);
  // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
  // sign bit.
  const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
  // 1. Extract the high bytes
  // Stop byte of the high bytes
  const int32_t high_bits_offset = std::max(0, length - 8);
  const auto high_bits = UInt64FromBigEndian(bytes, high_bits_offset);
  if (high_bits_offset == 8) {
    // Avoid undefined shift by 64 below
    high = high_bits;
    high = -1 * (is_negative && length < kMaxDecimalBytes);
    // Shift left enough bits to make room for the incoming int64_t
    high = SafeLeftShift(high, high_bits_offset * CHAR_BIT);
    // Preserve the upper bits by inplace OR-ing the int64_t
    high |= high_bits;
  // 2. Extract the low bytes
  // Stop byte of the low bytes
  const int32_t low_bits_offset = std::min(length, 8);
  const auto low_bits =
      UInt64FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);
  if (low_bits_offset == 8) {
    // Avoid undefined shift by 64 below
    low = low_bits;
    // Sign extend the low bits if necessary
    low = -1 * (is_negative && length < 8);
    // Shift left enough bits to make room for the incoming int64_t
    low = SafeLeftShift(low, low_bits_offset * CHAR_BIT);
    // Preserve the upper bits by inplace OR-ing the int64_t
    low |= low_bits;
  return Decimal128(high, static_cast<uint64_t>(low));
Status Decimal128::ToArrowStatus(DecimalStatus dstatus) const {
  return arrow::ToArrowStatus(dstatus, 128);
std::ostream& operator<<(std::ostream& os, const Decimal128& decimal) {
  os << decimal.ToIntegerString();
  return os;
Decimal256::Decimal256(const std::string& str) : Decimal256() {
  *this = Decimal256::FromString(str).ValueOrDie();
std::string Decimal256::ToIntegerString() const {
  std::string result;
  if (IsNegative()) {
    result.push_back('-');
    Decimal256 abs = *this;
    abs.Negate();
    AppendLittleEndianArrayToString(
        bit_util::little_endian::FromNative(abs.native_endian_array()), &result);
    AppendLittleEndianArrayToString(
        bit_util::little_endian::FromNative(native_endian_array()), &result);
  return result;
std::string Decimal256::ToString(int32_t scale) const {
  if (ARROW_PREDICT_FALSE(scale < -kMaxScale || scale > kMaxScale)) {
    return "<scale out of range, cannot format Decimal256 value>";
  std::string str(ToIntegerString());
  AdjustIntegerStringWithScale(scale, &str);
  return str;
Status Decimal256::FromString(std::string_view s, Decimal256* out, int32_t* precision,
                              int32_t* scale) {
  return DecimalFromString("decimal256", s, out, precision, scale);
Status Decimal256::FromString(const std::string& s, Decimal256* out, int32_t* precision,
                              int32_t* scale) {
  return FromString(std::string_view(s), out, precision, scale);
Status Decimal256::FromString(const char* s, Decimal256* out, int32_t* precision,
                              int32_t* scale) {
  return FromString(std::string_view(s), out, precision, scale);
Result<Decimal256> Decimal256::FromString(std::string_view s) {
  Decimal256 out;
  RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr));
  return std::move(out);
Result<Decimal256> Decimal256::FromString(const std::string& s) {
  return FromString(std::string_view(s));
Result<Decimal256> Decimal256::FromString(const char* s) {
  return FromString(std::string_view(s));
Result<Decimal256> Decimal256::FromBigEndian(const uint8_t* bytes, int32_t length) {
  static constexpr int32_t kMinDecimalBytes = 1;
  static constexpr int32_t kMaxDecimalBytes = 32;
  std::array<uint64_t, 4> little_endian_array;
  if (ARROW_PREDICT_FALSE(length < kMinDecimalBytes || length > kMaxDecimalBytes)) {
    return Status::Invalid("Length of byte array passed to Decimal128::FromBigEndian ",
                           "was ", length, ", but must be between ", kMinDecimalBytes,
                           " and ", kMaxDecimalBytes);
  // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
  // sign bit.
  const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
  for (int word_idx = 0; word_idx < 4; word_idx++) {
    const int32_t word_length = std::min(length, static_cast<int32_t>(sizeof(uint64_t)));
    if (word_length == 8) {
      // Full words can be assigned as is (and are UB with the shift below).
      little_endian_array[word_idx] =
          UInt64FromBigEndian(bytes + length - word_length, word_length);
    } else {
      // Sign extend the word its if necessary
      uint64_t word = -1 * is_negative;
      if (length > 0) {
        // Incorporate the actual values if present.
        // Shift left enough bits to make room for the incoming int64_t
        word = SafeLeftShift(word, word_length * CHAR_BIT);
        // Preserve the upper bits by inplace OR-ing the int64_t
        word |= UInt64FromBigEndian(bytes + length - word_length, word_length);
      little_endian_array[word_idx] = word;
    // Move on to the next word.
    length -= word_length;
  return Decimal256(bit_util::little_endian::ToNative(little_endian_array));
Status Decimal256::ToArrowStatus(DecimalStatus dstatus) const {
  return arrow::ToArrowStatus(dstatus, 256);
namespace {
template <typename Real, typename Derived>
struct Decimal256RealConversion {
  static Result<Decimal256> FromPositiveReal(Real real, int32_t precision,
    auto x = real;
    if (scale >= -76 && scale <= 76) {
      x *= Derived::powers_of_ten()[scale + 76];
    } else {
      x *= std::pow(static_cast<Real>(10), static_cast<Real>(scale));
    x = std::nearbyint(x);
    const auto max_abs = Derived::powers_of_ten()[precision + 76];
    if (x >= max_abs) {
      return Status::Invalid("Cannot convert ", real,
                             " to Decimal256(precision = ", precision,
                             ", scale = ", scale, "): overflow");
    // Extract parts
    const auto part3 = std::floor(std::ldexp(x, -192));
    x -= std::ldexp(part3, 192);
    const auto part2 = std::floor(std::ldexp(x, -128));
    x -= std::ldexp(part2, 128);
    const auto part1 = std::floor(std::ldexp(x, -64));
    x -= std::ldexp(part1, 64);
    const auto part0 = x;
    DCHECK_GE(part3, 0);
    DCHECK_LT(part3, 1.8446744073709552e+19);  // 2**64
    DCHECK_GE(part2, 0);
    DCHECK_LT(part2, 1.8446744073709552e+19);  // 2**64
    DCHECK_GE(part1, 0);
    DCHECK_LT(part1, 1.8446744073709552e+19);  // 2**64
    DCHECK_GE(part0, 0);
    DCHECK_LT(part0, 1.8446744073709552e+19);  // 2**64
    return Decimal256(bit_util::little_endian::ToNative<uint64_t, 4>(
        {static_cast<uint64_t>(part0), static_cast<uint64_t>(part1),
         static_cast<uint64_t>(part2), static_cast<uint64_t>(part3)}));
  static Result<Decimal256> FromReal(Real x, int32_t precision, int32_t scale) {
    DCHECK_GT(precision, 0);
    DCHECK_LE(precision, 76);
    if (!std::isfinite(x)) {
      return Status::Invalid("Cannot convert ", x, " to Decimal256");
    if (x < 0) {
      ARROW_ASSIGN_OR_RAISE(auto dec, FromPositiveReal(-x, precision, scale));
      return dec.Negate();
    } else {
      // Includes negative zero
      return FromPositiveReal(x, precision, scale);
  static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
    DCHECK_GE(decimal, 0);
    Real x = 0;
    const auto parts_le = bit_util::little_endian::Make(decimal.native_endian_array());
    x += Derived::two_to_192(static_cast<Real>(parts_le[3]));
    x += Derived::two_to_128(static_cast<Real>(parts_le[2]));
    x += Derived::two_to_64(static_cast<Real>(parts_le[1]));
    x += static_cast<Real>(parts_le[0]);
    if (scale >= -76 && scale <= 76) {
      x *= Derived::powers_of_ten()[-scale + 76];
    } else {
      x *= std::pow(static_cast<Real>(10), static_cast<Real>(-scale));
    return x;
  static Real ToReal(Decimal256 decimal, int32_t scale) {
    if (decimal.IsNegative()) {
      // Convert the absolute value to avoid precision loss
      decimal.Negate();
      return -ToRealPositive(decimal, scale);
    } else {
      return ToRealPositive(decimal, scale);
struct Decimal256FloatConversion
    : public Decimal256RealConversion<float, Decimal256FloatConversion> {
  static constexpr const float* powers_of_ten() { return kFloatPowersOfTen76; }
  static float two_to_64(float x) { return x * 1.8446744e+19f; }
  static float two_to_128(float x) { return x == 0 ? 0 : INFINITY; }
  static float two_to_192(float x) { return x == 0 ? 0 : INFINITY; }
struct Decimal256DoubleConversion
    : public Decimal256RealConversion<double, Decimal256DoubleConversion> {
  static constexpr const double* powers_of_ten() { return kDoublePowersOfTen76; }
  static double two_to_64(double x) { return x * 1.8446744073709552e+19; }
  static double two_to_128(double x) { return x * 3.402823669209385e+38; }
  static double two_to_192(double x) { return x * 6.277101735386681e+57; }
}  // namespace
Result<Decimal256> Decimal256::FromReal(float x, int32_t precision, int32_t scale) {
  return Decimal256FloatConversion::FromReal(x, precision, scale);
Result<Decimal256> Decimal256::FromReal(double x, int32_t precision, int32_t scale) {
  return Decimal256DoubleConversion::FromReal(x, precision, scale);
float Decimal256::ToFloat(int32_t scale) const {
  return Decimal256FloatConversion::ToReal(*this, scale);
double Decimal256::ToDouble(int32_t scale) const {
  return Decimal256DoubleConversion::ToReal(*this, scale);
std::ostream& operator<<(std::ostream& os, const Decimal256& decimal) {
  os << decimal.ToIntegerString();
  return os;
}  // namespace arrow
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

decimal.cc

Latest commit

History

decimal.cc

File metadata and controls