1  // Copyright 2018 Ulf Adams
       2  //
       3  // The contents of this file may be used under the terms of the Apache License,
       4  // Version 2.0.
       5  //
       6  //    (See accompanying file LICENSE-Apache or copy at
       7  //     http://www.apache.org/licenses/LICENSE-2.0)
       8  //
       9  // Alternatively, the contents of this file may be used under the terms of
      10  // the Boost Software License, Version 1.0.
      11  //    (See accompanying file LICENSE-Boost or copy at
      12  //     https://www.boost.org/LICENSE_1_0.txt)
      13  //
      14  // Unless required by applicable law or agreed to in writing, this software
      15  // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
      16  // KIND, either express or implied.
      17  #ifndef RYU_F2S_INTRINSICS_H
      18  #define RYU_F2S_INTRINSICS_H
      19  
      20  // Defines RYU_32_BIT_PLATFORM if applicable.
      21  
      22  #if defined(RYU_FLOAT_FULL_TABLE)
      23  
      24  
      25  #else
      26  
      27  #if defined(RYU_OPTIMIZE_SIZE)
      28  #else
      29  #endif
      30  #define FLOAT_POW5_INV_BITCOUNT (DOUBLE_POW5_INV_BITCOUNT - 64)
      31  #define FLOAT_POW5_BITCOUNT (DOUBLE_POW5_BITCOUNT - 64)
      32  
      33  #endif
      34  
      35  static inline uint32_t pow5factor_32(uint32_t value) {
      36    uint32_t count = 0;
      37    for (;;) {
      38      assert(value != 0);
      39      const uint32_t q = value / 5;
      40      const uint32_t r = value % 5;
      41      if (r != 0) {
      42        break;
      43      }
      44      value = q;
      45      ++count;
      46    }
      47    return count;
      48  }
      49  
      50  // Returns true if value is divisible by 5^p.
      51  static inline bool multipleOfPowerOf5_32(const uint32_t value, const uint32_t p) {
      52    return pow5factor_32(value) >= p;
      53  }
      54  
      55  // Returns true if value is divisible by 2^p.
      56  static inline bool multipleOfPowerOf2_32(const uint32_t value, const uint32_t p) {
      57    // __builtin_ctz doesn't appear to be faster here.
      58    return (value & ((1u << p) - 1)) == 0;
      59  }
      60  
      61  // It seems to be slightly faster to avoid uint128_t here, although the
      62  // generated code for uint128_t looks slightly nicer.
      63  static inline uint32_t mulShift32(const uint32_t m, const uint64_t factor, const int32_t shift) {
      64    assert(shift > 32);
      65  
      66    // The casts here help MSVC to avoid calls to the __allmul library
      67    // function.
      68    const uint32_t factorLo = (uint32_t)(factor);
      69    const uint32_t factorHi = (uint32_t)(factor >> 32);
      70    const uint64_t bits0 = (uint64_t)m * factorLo;
      71    const uint64_t bits1 = (uint64_t)m * factorHi;
      72  
      73  #if defined(RYU_32_BIT_PLATFORM)
      74    // On 32-bit platforms we can avoid a 64-bit shift-right since we only
      75    // need the upper 32 bits of the result and the shift value is > 32.
      76    const uint32_t bits0Hi = (uint32_t)(bits0 >> 32);
      77    uint32_t bits1Lo = (uint32_t)(bits1);
      78    uint32_t bits1Hi = (uint32_t)(bits1 >> 32);
      79    bits1Lo += bits0Hi;
      80    bits1Hi += (bits1Lo < bits0Hi);
      81    if (shift >= 64) {
      82      // s2f can call this with a shift value >= 64, which we have to handle.
      83      // This could now be slower than the !defined(RYU_32_BIT_PLATFORM) case.
      84      return (uint32_t)(bits1Hi >> (shift - 64));
      85    } else {
      86      const int32_t s = shift - 32;
      87      return (bits1Hi << (32 - s)) | (bits1Lo >> s);
      88    }
      89  #else // RYU_32_BIT_PLATFORM
      90    const uint64_t sum = (bits0 >> 32) + bits1;
      91    const uint64_t shiftedSum = sum >> (shift - 32);
      92    assert(shiftedSum <= UINT32_MAX);
      93    return (uint32_t) shiftedSum;
      94  #endif // RYU_32_BIT_PLATFORM
      95  }
      96  
      97  static inline uint32_t mulPow5InvDivPow2(const uint32_t m, const uint32_t q, const int32_t j) {
      98  #if defined(RYU_FLOAT_FULL_TABLE)
      99    return mulShift32(m, FLOAT_POW5_INV_SPLIT[q], j);
     100  #elif defined(RYU_OPTIMIZE_SIZE)
     101    // The inverse multipliers are defined as [2^x / 5^y] + 1; the upper 64 bits from the double lookup
     102    // table are the correct bits for [2^x / 5^y], so we have to add 1 here. Note that we rely on the
     103    // fact that the added 1 that's already stored in the table never overflows into the upper 64 bits.
     104    uint64_t pow5[2];
     105    double_computeInvPow5(q, pow5);
     106    return mulShift32(m, pow5[1] + 1, j);
     107  #else
     108    return mulShift32(m, DOUBLE_POW5_INV_SPLIT[q][1] + 1, j);
     109  #endif
     110  }
     111  
     112  static inline uint32_t mulPow5divPow2(const uint32_t m, const uint32_t i, const int32_t j) {
     113  #if defined(RYU_FLOAT_FULL_TABLE)
     114    return mulShift32(m, FLOAT_POW5_SPLIT[i], j);
     115  #elif defined(RYU_OPTIMIZE_SIZE)
     116    uint64_t pow5[2];
     117    double_computePow5(i, pow5);
     118    return mulShift32(m, pow5[1], j);
     119  #else
     120    return mulShift32(m, DOUBLE_POW5_SPLIT[i][1], j);
     121  #endif
     122  }
     123  
     124  #endif // RYU_F2S_INTRINSICS_H