1  // A relatively minimal unsigned 128-bit integer class type, used by the
       2  // floating-point std::to_chars implementation on targets that lack __int128.
       3  
       4  // Copyright (C) 2021-2023 Free Software Foundation, Inc.
       5  //
       6  // This file is part of the GNU ISO C++ Library.  This library is free
       7  // software; you can redistribute it and/or modify it under the
       8  // terms of the GNU General Public License as published by the
       9  // Free Software Foundation; either version 3, or (at your option)
      10  // any later version.
      11  
      12  // This library is distributed in the hope that it will be useful,
      13  // but WITHOUT ANY WARRANTY; without even the implied warranty of
      14  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15  // GNU General Public License for more details.
      16  
      17  // Under Section 7 of GPL version 3, you are granted additional
      18  // permissions described in the GCC Runtime Library Exception, version
      19  // 3.1, as published by the Free Software Foundation.
      20  
      21  // You should have received a copy of the GNU General Public License and
      22  // a copy of the GCC Runtime Library Exception along with this program;
      23  // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      24  // <http://www.gnu.org/licenses/>.
      25  
      26  struct uint128_t
      27  {
      28  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
      29    uint64_t lo, hi;
      30  #else
      31    uint64_t hi, lo;
      32  #endif
      33  
      34    uint128_t() = default;
      35  
      36    constexpr
      37    uint128_t(uint64_t lo, uint64_t hi = 0)
      38  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
      39      : lo(lo), hi(hi)
      40  #else
      41      : hi(hi), lo(lo)
      42  #endif
      43    { }
      44  
      45    constexpr explicit
      46    operator bool() const
      47    { return *this != 0; }
      48  
      49    template<typename T, typename = std::enable_if_t<std::is_integral_v<T>>>
      50      constexpr explicit
      51      operator T() const
      52      {
      53        static_assert(sizeof(T) <= sizeof(uint64_t));
      54        return static_cast<T>(lo);
      55      }
      56  
      57    friend constexpr uint128_t
      58    operator&(uint128_t x, const uint128_t y)
      59    {
      60      x.lo &= y.lo;
      61      x.hi &= y.hi;
      62      return x;
      63    }
      64  
      65    friend constexpr uint128_t
      66    operator|(uint128_t x, const uint128_t y)
      67    {
      68      x.lo |= y.lo;
      69      x.hi |= y.hi;
      70      return x;
      71    }
      72  
      73    friend constexpr uint128_t
      74    operator<<(uint128_t x, const uint128_t y)
      75    {
      76      __glibcxx_assert(y < 128);
      77      // TODO: Convince GCC to use shldq on x86 here.
      78      if (y.lo >= 64)
      79        {
      80  	x.hi = x.lo << (y.lo - 64);
      81  	x.lo = 0;
      82        }
      83      else if (y.lo != 0)
      84        {
      85  	x.hi <<= y.lo;
      86  	x.hi |= x.lo >> (64 - y.lo);
      87  	x.lo <<= y.lo;
      88        }
      89      return x;
      90    }
      91  
      92    friend constexpr uint128_t
      93    operator>>(uint128_t x, const uint128_t y)
      94    {
      95      __glibcxx_assert(y < 128);
      96      // TODO: Convince GCC to use shrdq on x86 here.
      97      if (y.lo >= 64)
      98        {
      99  	x.lo = x.hi >> (y.lo - 64);
     100  	x.hi = 0;
     101        }
     102      else if (y.lo != 0)
     103        {
     104  	x.lo >>= y.lo;
     105  	x.lo |= x.hi << (64 - y.lo);
     106  	x.hi >>= y.lo;
     107        }
     108      return x;
     109    }
     110  
     111    constexpr uint128_t
     112    operator~() const
     113    { return {~lo, ~hi}; }
     114  
     115    constexpr uint128_t
     116    operator-() const
     117    { return operator~() + 1; }
     118  
     119    friend constexpr uint128_t
     120    operator+(uint128_t x, const uint128_t y)
     121    {
     122      x.hi += __builtin_add_overflow(x.lo, y.lo, &x.lo);
     123      x.hi += y.hi;
     124      return x;
     125    }
     126  
     127    friend constexpr uint128_t
     128    operator-(uint128_t x, const uint128_t y)
     129    {
     130      x.hi -= __builtin_sub_overflow(x.lo, y.lo, &x.lo);
     131      x.hi -= y.hi;
     132      return x;
     133    }
     134  
     135    static constexpr uint128_t
     136    umul64_64_128(const uint64_t x, const uint64_t y)
     137    {
     138      const uint64_t xl = x & 0xffffffff;
     139      const uint64_t xh = x >> 32;
     140      const uint64_t yl = y & 0xffffffff;
     141      const uint64_t yh = y >> 32;
     142      const uint64_t ll = xl * yl;
     143      const uint64_t lh = xl * yh;
     144      const uint64_t hl = xh * yl;
     145      const uint64_t hh = xh * yh;
     146      const uint64_t m = (ll >> 32) + lh + (hl & 0xffffffff);
     147      const uint64_t l = (ll & 0xffffffff ) | (m << 32);
     148      const uint64_t h = (m >> 32) + (hl >> 32) + hh;
     149      return {l, h};
     150    }
     151  
     152    friend constexpr uint128_t
     153    operator*(const uint128_t x, const uint128_t y)
     154    {
     155      uint128_t z = umul64_64_128(x.lo, y.lo);
     156      z.hi += x.lo * y.hi + x.hi * y.lo;
     157      return z;
     158    }
     159  
     160    friend constexpr uint128_t
     161    operator/(const uint128_t x, const uint128_t y)
     162    {
     163      // Ryu performs 128-bit division only by 5 and 10, so that's what we
     164      // implement.  The strategy here is to relate division of x with that of
     165      // x.hi and x.lo separately.
     166      __glibcxx_assert(y == 5 || y == 10);
     167      // The following implements division by 5 and 10.  In either case, we
     168      // first compute division by 5:
     169      //   x/5 = (x.hi*2^64 + x.lo)/5
     170      //       = (x.hi*(2^64-1) + x.hi + x.lo)/5
     171      //       = x.hi*((2^64-1)/5) + (x.hi + x.lo)/5 since CST=(2^64-1)/5 is exact
     172      //       = x.hi*CST + x.hi/5 + x.lo/5 + ((x.lo%5) + (x.hi%5) >= 5)
     173      // We go a step further and replace the last adjustment term with a
     174      // lookup table, which we encode as a binary literal.  This seems to
     175      // yield smaller code on x86 at least.
     176      constexpr auto cst = ~uint64_t(0) / 5;
     177      uint128_t q = uint128_t{x.hi}*cst + uint128_t{x.hi/5 + x.lo/5};
     178      constexpr auto lookup = 0b111100000u;
     179      q += (lookup >> ((x.hi % 5) + (x.lo % 5))) & 1;
     180      if (y == 10)
     181        q >>= 1;
     182      return q;
     183    }
     184  
     185    friend constexpr uint128_t
     186    operator%(const uint128_t x, const uint128_t y)
     187    {
     188      // Ryu performs 128-bit modulus only by 2, 5 and 10, so that's what we
     189      // implement.  The strategy here is to relate modulus of x with that of
     190      // x.hi and x.lo separately.
     191      if (y == 2)
     192        return x & 1;
     193      __glibcxx_assert(y == 5 || y == 10);
     194      // The following implements modulus by 5 and 10.  In either case,
     195      // we first compute modulus by 5:
     196      //   x (mod 5) = x.hi*2^64 + x.lo (mod 5)
     197      //             = x.hi + x.lo (mod 5) since 2^64 ≡ 1 (mod 5)
     198      // So the straightforward implementation would be
     199      //   ((x.hi % 5) + (x.lo % 5)) % 5
     200      // But we go a step further and replace the outermost % with a
     201      // lookup table:
     202      //             = {0,1,2,3,4,0,1,2,3}[(x.hi % 5) + (x.lo % 5)] (mod 5)
     203      // which we encode as an octal literal.
     204      constexpr auto lookup = 0321043210u;
     205      auto r = (lookup >> 3*((x.hi % 5) + (x.lo % 5))) & 7;
     206      if (y == 10)
     207        // x % 10 = (x % 5)      if x / 5 is even
     208        //          (x % 5) + 5  if x / 5 is odd
     209        // The compiler should be able to CSE the below computation of x/5 and
     210        // the above modulus operations with a nearby inlined computation of x/10.
     211        r += 5 * ((x/5).lo & 1);
     212      return r;
     213    }
     214  
     215    friend constexpr bool
     216    operator==(const uint128_t x, const uint128_t y)
     217    { return x.hi == y.hi && x.lo == y.lo; }
     218  
     219    friend constexpr bool
     220    operator<(const uint128_t x, const uint128_t y)
     221    { return x.hi < y.hi || (x.hi == y.hi && x.lo < y.lo); }
     222  
     223    friend constexpr auto
     224    __bit_width(const uint128_t x)
     225    {
     226      if (auto w = std::__bit_width(x.hi))
     227        return w + 64;
     228      else
     229        return std::__bit_width(x.lo);
     230    }
     231  
     232    friend constexpr auto
     233    __countr_zero(const uint128_t x)
     234    {
     235      auto c = std::__countr_zero(x.lo);
     236      if (c == 64)
     237        return 64 + std::__countr_zero(x.hi);
     238      else
     239        return c;
     240    }
     241  
     242    constexpr uint128_t&
     243    operator--()
     244    { return *this -= 1; }
     245  
     246    constexpr uint128_t&
     247    operator++()
     248    { return *this += 1; }
     249  
     250    constexpr uint128_t&
     251    operator+=(const uint128_t y)
     252    { return *this = *this + y; }
     253  
     254    constexpr uint128_t&
     255    operator-=(const uint128_t y)
     256    { return *this = *this - y; }
     257  
     258    constexpr uint128_t&
     259    operator*=(const uint128_t y)
     260    { return *this = *this * y; }
     261  
     262    constexpr uint128_t&
     263    operator<<=(const uint128_t y)
     264    { return *this = *this << y; }
     265  
     266    constexpr uint128_t&
     267    operator>>=(const uint128_t y)
     268    { return *this = *this >> y; }
     269  
     270    constexpr uint128_t&
     271    operator|=(const uint128_t y)
     272    { return *this = *this | y; }
     273  
     274    constexpr uint128_t&
     275    operator&=(const uint128_t y)
     276    { return *this = *this & y; }
     277  
     278    constexpr uint128_t&
     279    operator%=(const uint128_t y)
     280    { return *this = *this % y; }
     281  
     282    constexpr uint128_t&
     283    operator/=(const uint128_t y)
     284    { return *this = *this / y; }
     285  
     286    friend constexpr bool
     287    operator!=(const uint128_t x, const uint128_t y)
     288    { return !(x == y); }
     289  
     290    friend constexpr bool
     291    operator>(const uint128_t x, const uint128_t y)
     292    { return y < x; }
     293  
     294    friend constexpr bool
     295    operator>=(const uint128_t x, const uint128_t y)
     296    { return !(x < y); }
     297  };