1  /* Signed and unsigned multiplication and division and modulus for CRIS.
       2     Contributed by Axis Communications.
       3     Written by Hans-Peter Nilsson <hp@axis.se>, c:a 1992.
       4  
       5     Copyright (C) 1998-2023 Free Software Foundation, Inc.
       6  
       7  This file is part of GCC.
       8  
       9  GCC is free software; you can redistribute it and/or modify it
      10  under the terms of the GNU General Public License as published by the
      11  Free Software Foundation; either version 3, or (at your option) any
      12  later version.
      13  
      14  This file is distributed in the hope that it will be useful, but
      15  WITHOUT ANY WARRANTY; without even the implied warranty of
      16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      17  General Public License for more details.
      18  
      19  Under Section 7 of GPL version 3, you are granted additional
      20  permissions described in the GCC Runtime Library Exception, version
      21  3.1, as published by the Free Software Foundation.
      22  
      23  You should have received a copy of the GNU General Public License and
      24  a copy of the GCC Runtime Library Exception along with this program;
      25  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      26  <http://www.gnu.org/licenses/>.  */
      27  
      28  
      29  /* Note that we provide prototypes for all "const" functions, to attach
      30     the const attribute.  This is necessary in 2.7.2 - adding the
      31     attribute to the function *definition* is a syntax error.
      32      This did not work with e.g. 2.1; back then, the return type had to
      33     be "const".  */
      34  
      35  #include "config.h"
      36  
      37  #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 3
      38  #define LZ(v) __builtin_clz (v)
      39  #endif
      40  
      41  /* In (at least) the 4.7 series, GCC doesn't automatically choose the
      42     most optimal strategy, possibly related to insufficient modelling of
      43     delay-slot costs.  */
      44  #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
      45  #define SIGNMULT(s, a) ((s) * (a)) /* Cheap multiplication, better than branch.  */
      46  #else
      47  #define SIGNMULT(s, a) ((s) < 0 ? -(a) : (a)) /* Branches are still better.  */
      48  #endif
      49  
      50  #if defined (L_udivsi3) || defined (L_divsi3) || defined (L_umodsi3) \
      51      || defined (L_modsi3)
      52  /* Result type of divmod worker function.  */
      53  struct quot_rem
      54   {
      55     long quot;
      56     long rem;
      57   };
      58  
      59  /* This is the worker function for div and mod.  It is inlined into the
      60     respective library function.  Parameter A must have bit 31 == 0.  */
      61  
      62  static __inline__ struct quot_rem
      63  do_31div (unsigned long a, unsigned long b)
      64       __attribute__ ((__const__, __always_inline__));
      65  
      66  static __inline__ struct quot_rem
      67  do_31div (unsigned long a, unsigned long b)
      68  {
      69    /* Adjust operands and result if a is 31 bits.  */
      70    long extra = 0;
      71    int quot_digits = 0;
      72  
      73    if (b == 0)
      74      {
      75        struct quot_rem ret;
      76        ret.quot = 0xffffffff;
      77        ret.rem = 0xffffffff;
      78        return ret;
      79      }
      80  
      81    if (a < b)
      82      return (struct quot_rem) { 0, a };
      83  
      84  #ifdef LZ
      85    if (b <= a)
      86      {
      87        quot_digits = LZ (b) - LZ (a);
      88        quot_digits += (a >= (b << quot_digits));
      89        b <<= quot_digits;
      90      }
      91  #else
      92    while (b <= a)
      93      {
      94        b <<= 1;
      95        quot_digits++;
      96      }
      97  #endif
      98  
      99    /* Is a 31 bits?  Note that bit 31 is handled by the caller.  */
     100    if (a & 0x40000000)
     101      {
     102        /* Then make b:s highest bit max 0x40000000, because it must have
     103  	 been 0x80000000 to be 1 bit higher than a.  */
     104        b >>= 1;
     105  
     106        /* Adjust a to be maximum 0x3fffffff, i.e. two upper bits zero.  */
     107        if (a >= b)
     108  	{
     109  	  a -= b;
     110  	  extra = 1 << (quot_digits - 1);
     111  	}
     112        else
     113  	{
     114  	  a -= b >> 1;
     115  
     116  	  /* Remember that we adjusted a by subtracting b * 2 ** Something.  */
     117  	  extra = 1 << quot_digits;
     118  	}
     119  
     120        /* The number of quotient digits will be one less, because
     121  	 we just adjusted b.  */
     122        quot_digits--;
     123      }
     124  
     125    /* Now do the division part.  */
     126  
     127    /* Subtract b and add ones to the right when a >= b
     128       i.e. "a - (b - 1) == (a - b) + 1".  */
     129    b--;
     130  
     131  #define DS __asm__ ("dstep %2,%0" : "=r" (a) : "0" (a), "r" (b)); \
     132   __attribute__ ((__fallthrough__))
     133  
     134    switch (quot_digits)
     135      {
     136      case 32: DS; case 31: DS; case 30: DS; case 29: DS;
     137      case 28: DS; case 27: DS; case 26: DS; case 25: DS;
     138      case 24: DS; case 23: DS; case 22: DS; case 21: DS;
     139      case 20: DS; case 19: DS; case 18: DS; case 17: DS;
     140      case 16: DS; case 15: DS; case 14: DS; case 13: DS;
     141      case 12: DS; case 11: DS; case 10: DS; case 9: DS;
     142      case 8: DS; case 7: DS; case 6: DS; case 5: DS;
     143      case 4: DS; case 3: DS; case 2: DS; case 1: DS;
     144      case 0:;
     145      }
     146  
     147    {
     148      struct quot_rem ret;
     149      ret.quot = (a & ((1 << quot_digits) - 1)) + extra;
     150      ret.rem = a >> quot_digits;
     151      return ret;
     152    }
     153  }
     154  
     155  #ifdef L_udivsi3
     156  unsigned long
     157  __Udiv (unsigned long a, unsigned long b) __attribute__ ((__const__));
     158  
     159  unsigned long
     160  __Udiv (unsigned long a, unsigned long b)
     161  {
     162    long extra = 0;
     163  
     164    /* Adjust operands and result, if a and/or b is 32 bits.  */
     165    /* Effectively: b & 0x80000000.  */
     166    if ((long) b < 0)
     167      return a >= b;
     168  
     169    /* Effectively: a & 0x80000000.  */
     170    if ((long) a < 0)
     171      {
     172        int tmp = 0;
     173  
     174        if (b == 0)
     175  	return 0xffffffff;
     176  #ifdef LZ
     177        tmp = LZ (b);
     178  #else
     179        for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--)
     180  	;
     181  
     182        tmp = 31 - tmp;
     183  #endif
     184  
     185        if ((b << tmp) > a)
     186  	{
     187  	  extra = 1 << (tmp-1);
     188  	  a -= b << (tmp - 1);
     189  	}
     190        else
     191  	{
     192  	  extra = 1 << tmp;
     193  	  a -= b << tmp;
     194  	}
     195      }
     196  
     197    return do_31div (a, b).quot+extra;
     198  }
     199  #endif /* L_udivsi3 */
     200  
     201  #ifdef L_divsi3
     202  long
     203  __Div (long a, long b) __attribute__ ((__const__));
     204  
     205  long
     206  __Div (long a, long b)
     207  {
     208    long extra = 0;
     209    long sign = (b < 0) ? -1 : 1;
     210    long res;
     211  
     212    /* We need to handle a == -2147483648 as expected and must while
     213       doing that avoid producing a sequence like "abs (a) < 0" as GCC
     214       may optimize out the test.  That sequence may not be obvious as
     215       we call inline functions.  Testing for a being negative and
     216       handling (presumably much rarer than positive) enables us to get
     217       a bit of optimization for an (accumulated) reduction of the
     218       penalty of the 0x80000000 special-case.  */
     219    if (a < 0)
     220      {
     221        sign = -sign;
     222  
     223        if ((a & 0x7fffffff) == 0)
     224  	{
     225  	  /* We're at 0x80000000.  Tread carefully.  */
     226  	  a -= SIGNMULT (sign, b);
     227  	  extra = sign;
     228  	}
     229        a = -a;
     230      }
     231  
     232    res = do_31div (a, __builtin_labs (b)).quot;
     233    return SIGNMULT (sign, res) + extra;
     234  }
     235  #endif /* L_divsi3 */
     236  
     237  
     238  #ifdef L_umodsi3
     239  unsigned long
     240  __Umod (unsigned long a, unsigned long b) __attribute__ ((__const__));
     241  
     242  unsigned long
     243  __Umod (unsigned long a, unsigned long b)
     244  {
     245    /* Adjust operands and result if a and/or b is 32 bits.  */
     246    if ((long) b < 0)
     247      return a >= b ? a - b : a;
     248  
     249    if ((long) a < 0)
     250      {
     251        int tmp = 0;
     252  
     253        if (b == 0)
     254  	return a;
     255  #ifdef LZ
     256        tmp = LZ (b);
     257  #else
     258        for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--)
     259  	;
     260        tmp = 31 - tmp;
     261  #endif
     262  
     263        if ((b << tmp) > a)
     264  	{
     265  	  a -= b << (tmp - 1);
     266  	}
     267        else
     268  	{
     269  	  a -= b << tmp;
     270  	}
     271      }
     272  
     273    return do_31div (a, b).rem;
     274  }
     275  #endif /* L_umodsi3 */
     276  
     277  #ifdef L_modsi3
     278  long
     279  __Mod (long a, long b) __attribute__ ((__const__));
     280  
     281  long
     282  __Mod (long a, long b)
     283  {
     284    long sign = 1;
     285    long res;
     286  
     287    /* We need to handle a == -2147483648 as expected and must while
     288       doing that avoid producing a sequence like "abs (a) < 0" as GCC
     289       may optimize out the test.  That sequence may not be obvious as
     290       we call inline functions.  Testing for a being negative and
     291       handling (presumably much rarer than positive) enables us to get
     292       a bit of optimization for an (accumulated) reduction of the
     293       penalty of the 0x80000000 special-case.  */
     294    if (a < 0)
     295      {
     296        sign = -1;
     297        if ((a & 0x7fffffff) == 0)
     298  	/* We're at 0x80000000.  Tread carefully.  */
     299  	a += __builtin_labs (b);
     300        a = -a;
     301      }
     302  
     303    res = do_31div (a, __builtin_labs (b)).rem;
     304    return SIGNMULT (sign, res);
     305  }
     306  #endif /* L_modsi3 */
     307  #endif /* L_udivsi3 || L_divsi3 || L_umodsi3 || L_modsi3 */
     308  
     309  /*
     310   * Local variables:
     311   * eval: (c-set-style "gnu")
     312   * indent-tabs-mode: t
     313   * End:
     314   */