1  /* UltraSPARC 64 support macros.
       2  
       3     THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
       4     CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
       5     FUTURE GNU MP RELEASES.
       6  
       7  Copyright 2003 Free Software Foundation, Inc.
       8  
       9  This file is part of the GNU MP Library.
      10  
      11  The GNU MP Library is free software; you can redistribute it and/or modify
      12  it under the terms of either:
      13  
      14    * the GNU Lesser General Public License as published by the Free
      15      Software Foundation; either version 3 of the License, or (at your
      16      option) any later version.
      17  
      18  or
      19  
      20    * the GNU General Public License as published by the Free Software
      21      Foundation; either version 2 of the License, or (at your option) any
      22      later version.
      23  
      24  or both in parallel, as here.
      25  
      26  The GNU MP Library is distributed in the hope that it will be useful, but
      27  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      28  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      29  for more details.
      30  
      31  You should have received copies of the GNU General Public License and the
      32  GNU Lesser General Public License along with the GNU MP Library.  If not,
      33  see https://www.gnu.org/licenses/.  */
      34  
      35  
      36  #define LOW32(x)   ((x) & 0xFFFFFFFF)
      37  #define HIGH32(x)  ((x) >> 32)
      38  
      39  
      40  /* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)].
      41     Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the
      42     effect of swapping the two halves in this case.  */
      43  #if HAVE_LIMB_BIG_ENDIAN
      44  #define HALF_ENDIAN_ADJ(i)  (1 - (((i) & 1) << 1))   /* +1 even, -1 odd */
      45  #endif
      46  #if HAVE_LIMB_LITTLE_ENDIAN
      47  #define HALF_ENDIAN_ADJ(i)  0                        /* no adjust */
      48  #endif
      49  #ifndef HALF_ENDIAN_ADJ
      50  Error, error, unknown limb endianness;
      51  #endif
      52  
      53  
      54  /* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb
      55     of that product is equal to l.  dh and dl are the 32-bit halves of d.
      56  
      57     |-----high----||----low-----|
      58     +------+------+
      59     |             |                 ph = qh * dh
      60     +------+------+
      61            +------+------+
      62            |             |          pm1 = ql * dh
      63            +------+------+
      64            +------+------+
      65            |             |          pm2 = qh * dl
      66            +------+------+
      67                   +------+------+
      68                   |             |   pl = ql * dl (not calculated)
      69                   +------+------+
      70  
      71     Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2)
      72     + HIGH(pl) == HIGH(l).  The only thing we need from those product parts
      73     is whether they produce a carry into the high.
      74  
      75     pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only
      76     time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) >
      77     HIGH(l).  pl is never actually calculated.  */
      78  
      79  #define umul_ppmm_lowequal(h, q, d, dh, dl, l)  \
      80    do {                                          \
      81      mp_limb_t  ql, qh, ph, pm1, pm2, pm_l;      \
      82      ASSERT (dh == HIGH32(d));                   \
      83      ASSERT (dl == LOW32(d));                    \
      84      ASSERT (q*d == l);                          \
      85                                                  \
      86      ql = LOW32 (q);                             \
      87      qh = HIGH32 (q);                            \
      88                                                  \
      89      pm1 = ql * dh;                              \
      90      pm2 = qh * dl;                              \
      91      ph  = qh * dh;                              \
      92                                                  \
      93      pm_l = LOW32 (pm1) + LOW32 (pm2);           \
      94                                                  \
      95      (h) = ph + HIGH32 (pm1) + HIGH32 (pm2)      \
      96        + HIGH32 (pm_l) + ((pm_l << 32) > l);     \
      97                                                  \
      98      ASSERT_HIGH_PRODUCT (h, q, d);              \
      99    } while (0)
     100  
     101  
     102  /* Set h to the high of q*d, assuming the low limb of that product is equal
     103     to l, and that d fits in 32-bits.
     104  
     105     |-----high----||----low-----|
     106            +------+------+
     107            |             |          pm = qh * dl
     108            +------+------+
     109                   +------+------+
     110                   |             |   pl = ql * dl (not calculated)
     111                   +------+------+
     112  
     113     Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only
     114     time there's a carry from that sum is when LOW(pm) > HIGH(l).  There's no
     115     need to calculate pl to determine this.  */
     116  
     117  #define umul_ppmm_half_lowequal(h, q, d, l)     \
     118    do {                                          \
     119      mp_limb_t pm;                               \
     120      ASSERT (q*d == l);                          \
     121      ASSERT (HIGH32(d) == 0);                    \
     122                                                  \
     123      pm = HIGH32(q) * d;                         \
     124      (h) = HIGH32(pm) + ((pm << 32) > l);        \
     125      ASSERT_HIGH_PRODUCT (h, q, d);              \
     126    } while (0)
     127  
     128  
     129  /* check that h is the high limb of x*y */
     130  #if WANT_ASSERT
     131  #define ASSERT_HIGH_PRODUCT(h, x, y)    \
     132    do {                                  \
     133      mp_limb_t  want_h, dummy;           \
     134      umul_ppmm (want_h, dummy, x, y);    \
     135      ASSERT (h == want_h);               \
     136    } while (0)
     137  #else
     138  #define ASSERT_HIGH_PRODUCT(h, q, d)    \
     139    do { } while (0)
     140  #endif
     141  
     142  
     143  /* Multiply u anv v, where v < 2^32.  */
     144  #define umul_ppmm_s(w1, w0, u, v)					\
     145    do {									\
     146      UWtype __x0, __x2;							\
     147      UWtype __ul, __vl, __uh;						\
     148      UWtype __u = (u), __v = (v);					\
     149  									\
     150      __ul = __ll_lowpart (__u);						\
     151      __uh = __ll_highpart (__u);						\
     152      __vl = __ll_lowpart (__v);						\
     153  									\
     154      __x0 = (UWtype) __ul * __vl;					\
     155      __x2 = (UWtype) __uh * __vl;					\
     156  									\
     157      (w1) = (__x2 + (__x0 >> W_TYPE_SIZE/2)) >> W_TYPE_SIZE/2;		\
     158      (w0) = (__x2 << W_TYPE_SIZE/2) + __x0;				\
     159    } while (0)
     160  
     161  /* Count the leading zeros on a limb, but assuming it fits in 32 bits.
     162     The count returned will be in the range 32 to 63.
     163     This is the 32-bit generic C count_leading_zeros from longlong.h. */
     164  #define count_leading_zeros_32(count, x)                                      \
     165    do {                                                                        \
     166      mp_limb_t  __xr = (x);                                                    \
     167      unsigned   __a;                                                           \
     168      ASSERT ((x) != 0);                                                        \
     169      ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF));                                    \
     170      __a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1)  \
     171        : (__xr < ((UWtype) 1 << 24)  ? 16 + 1 : 24 + 1);                       \
     172                                                                                \
     173      (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a];                 \
     174    } while (0)
     175  
     176  
     177  /* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits
     178     32 bits and is normalized (high bit set).  */
     179  #define invert_half_limb(inv, d)                \
     180    do {                                          \
     181      mp_limb_t  _n;                              \
     182      ASSERT ((d) <= 0xFFFFFFFF);                 \
     183      ASSERT ((d) & 0x80000000);                  \
     184      _n = (((mp_limb_t) -(d)) << 32) - 1;        \
     185      (inv) = (mp_limb_t) (unsigned) (_n / (d));  \
     186    } while (0)
     187  
     188  
     189  /* Divide nh:nl by d, setting q to the quotient and r to the remainder.
     190     q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t,
     191     dinv_limb is similarly a 32-bit inverse but in an mp_limb_t.  */
     192  
     193  #define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb)         \
     194    do {                                                                  \
     195      unsigned   _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q;               \
     196      mp_limb_t  _n, _x;                                                  \
     197      ASSERT (d_limb <= 0xFFFFFFFF);                                      \
     198      ASSERT (dinv_limb <= 0xFFFFFFFF);                                   \
     199      ASSERT (d_limb & 0x80000000);                                       \
     200      ASSERT (nh < d_limb);                                               \
     201      _n10 = (nl);                                                        \
     202      _n2 = (nh);                                                         \
     203      _n1 = (int) _n10 >> 31;                                             \
     204      _nadj = _n10 + (_n1 & d_limb);                                      \
     205      _x = dinv_limb * (_n2 - _n1) + _nadj;                               \
     206      _q11n = ~(_n2 + HIGH32 (_x));             /* -q1-1 */               \
     207      _n = ((mp_limb_t) _n2 << 32) + _n10;                                \
     208      _x = _n + d_limb * _q11n;                 /* n-q1*d-d */            \
     209      _xh = HIGH32 (_x) - d_limb;               /* high(n-q1*d-d) */      \
     210      ASSERT (_xh == 0 || _xh == ~0);                                     \
     211      _r = _x + (d_limb & _xh);                 /* addback */             \
     212      _q = _xh - _q11n;                         /* q1+1-addback */        \
     213      ASSERT (_r < d_limb);                                               \
     214      ASSERT (d_limb * _q + _r == _n);                                    \
     215      (r) = _r;                                                           \
     216      (q) = _q;                                                           \
     217    } while (0)