1  /* PPU intrinsics as defined by the C/C++ Language extension for Cell BEA.
       2     Copyright (C) 2007-2023 Free Software Foundation, Inc.
       3  
       4     This file is free software; you can redistribute it and/or modify it under
       5     the terms of the GNU General Public License as published by the Free
       6     Software Foundation; either version 3 of the License, or (at your option)
       7     any later version.
       8  
       9     This file is distributed in the hope that it will be useful, but WITHOUT
      10     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
      11     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      12     for more details.
      13  
      14     Under Section 7 of GPL version 3, you are granted additional
      15     permissions described in the GCC Runtime Library Exception, version
      16     3.1, as published by the Free Software Foundation.
      17  
      18     You should have received a copy of the GNU General Public License and
      19     a copy of the GCC Runtime Library Exception along with this program;
      20     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      21     <http://www.gnu.org/licenses/>.  */
      22  
      23  /*  TODO:
      24      misc ops (traps)
      25      supervisor/hypervisor mode ops.  */
      26  
      27  #ifndef  _PPU_INTRINSICS_H
      28  #define _PPU_INTRINSICS_H
      29  
      30  #if !defined(__PPU__) && !defined(__ppc__) && !defined(__ppc64__) \
      31      && !defined(__GNUC__)
      32    #error ppu_intrinsics.h included on wrong platform/compiler
      33  #endif
      34  
      35  #ifdef __cplusplus
      36  extern "C" {
      37  #endif 
      38  
      39  /*
      40   * unsigned int __cntlzw(unsigned int)
      41   * unsigned int __cntlzd(unsigned long long)
      42   * int __mulhw(int, int)
      43   * unsigned int __mulhwu(unsigned int, unsigned int)
      44   * long long __mulhd(long long, long long)
      45   * unsigned long long __mulhdu(unsigned long long, unsigned long long)
      46   *
      47   * void __sync(void)
      48   * void __isync(void)
      49   * void __lwsync(void)
      50   * void __eieio(void)
      51   *
      52   * void __nop(void)
      53   * void __cctpl(void)
      54   * void __cctpm(void)
      55   * void __cctph(void)
      56   * void __db8cyc(void)
      57   * void __db10cyc(void)
      58   * void __db12cyc(void)
      59   * void __db16cyc(void)
      60   *
      61   * void __mtspr(unsigned int spr, unsigned long long value)
      62   * unsigned long long __mfspr(unsigned int spr)
      63   * unsigned long long __mftb(void)
      64   *
      65   * void __icbi(void *base)
      66   * void __dcbi(void *base)
      67   *
      68   * void __dcbf(void *base)
      69   * void __dcbz(void *base)
      70   * void __dcbst(void *base)
      71   * void __dcbtst(void *base)
      72   * void __dcbt(void *base)
      73   * void __dcbt_TH1000(void *EATRUNC, bool D, bool UG, int ID)
      74   * void __dcbt_TH1010(bool GO, int S, int UNITCNT, bool T, bool U, int ID)
      75   *
      76   * unsigned __lwarx(void *base)
      77   * unsigned long long __ldarx(void *base)
      78   * bool __stwcx(void *base, unsigned value)
      79   * bool __stdcx(void *base, unsigned long long value)
      80   *
      81   * unsigned short __lhbrx(void *base)
      82   * unsigned int __lwbrx(void *base)
      83   * unsigned long long __ldbrx(void *base)
      84   * void __sthbrx(void *base, unsigned short value)
      85   * void __stwbrx(void *base, unsigned int value)
      86   * void __stdbrx(void *base, unsigned long long value)
      87   *
      88   * double __fabs(double x)
      89   * float __fabsf(float x)
      90   * double __fnabs(double x)
      91   * float __fnabsf(float x)
      92   * double __fmadd(double x, double y, double z)
      93   * double __fmsub(double x, double y, double z)
      94   * double __fnmadd(double x, double y, double z)
      95   * double __fnmsub(double x, double y, double z)
      96   * float __fmadds(float x, float y, float z)
      97   * float __fmsubs(float x, float y, float z)
      98   * float __fnmadds(float x, float y, float z)
      99   * float __fnmsubs(float x, float y, float z)
     100   * double __fsel(double x, double y, double z)
     101   * float __fsels(float x, float y, float z)
     102   * double __frsqrte(double x)
     103   * float __fres(float x)
     104   * double __fsqrt(double x)
     105   * float __fsqrts(float x)
     106   * long long __fctid(double x)
     107   * long long __fctiw(double x)
     108   * double __fcfid(long long x)
     109   * double __mffs(void)
     110   * void __mtfsf(int mask, double value)
     111   * void __mtfsfi(int bits, int field)
     112   * void __mtfsb0(int)
     113   * void __mtfsb1(int)
     114   * double __setflm(double)
     115   *
     116   * dcbt intrinsics 
     117   * void __protected_unlimited_stream_set (unsigned int direction, const void *add, unsigned int ID)
     118   * void __protected_stream_set (unsigned int direction, const void *add, unsigned int ID)
     119   * void __protected_stream_stop_all (void)
     120   * void __protected_stream_stop (unsigned int ID)
     121   * void __protected_stream_count (unsigned int unit_cnt, unsigned int ID)
     122   * void __protected_stream_go (void)
     123   */
     124  
     125  typedef int __V4SI __attribute__((vector_size(16)));
     126  
     127  #define __cntlzw(v) __builtin_clz(v)
     128  #define __cntlzd(v) __builtin_clzll(v)
     129  
     130  #define __mulhw(a,b) __extension__ \
     131    ({int result;			   \
     132    __asm__ ("mulhw %0,%1,%2"	   \
     133  	   : "=r" (result)	   \
     134  	   : "r" ((int) (a)),	   \
     135  	     "r" ((int) (b)));	   \
     136    result; })
     137  
     138  #define __mulhwu(a,b) __extension__	\
     139    ({unsigned int result;		\
     140    __asm__ ("mulhwu %0,%1,%2"		\
     141  	   : "=r" (result)		\
     142  	   : "r" ((unsigned int) (a)),	\
     143  	     "r" ((unsigned int) (b))); \
     144    result; })
     145  
     146  #ifdef __powerpc64__
     147  #define __mulhd(a,b) __extension__   \
     148    ({ long long result;		     \
     149    __asm__ ("mulhd %0,%1,%2"	     \
     150  	   : "=r" (result)	     \
     151  	   : "r" ((long long) (a)),  \
     152  	     "r" ((long long) (b))); \
     153    result; })
     154  
     155  #define __mulhdu(a,b) __extension__	      \
     156    ({unsigned long long result;		      \
     157    __asm__ ("mulhdu %0,%1,%2"		      \
     158  	   : "=r" (result)		      \
     159  	   : "r" ((unsigned long long) (a)),  \
     160  	     "r" ((unsigned long long) (b))); \
     161    result; })
     162  #endif /* __powerpc64__ */
     163  
     164  #define __sync() __asm__ volatile ("sync" : : : "memory")
     165  #define __isync() __asm__ volatile ("isync" : : : "memory")
     166  #define __lwsync() __asm__ volatile ("lwsync" : : : "memory")
     167  #define __eieio() __asm__ volatile ("eieio" : : : "memory")
     168  
     169  #define __nop() __asm__ volatile ("ori 0,0,0" : : : "memory")
     170  #define __cctpl() __asm__ volatile ("or 1,1,1" : : : "memory")
     171  #define __cctpm() __asm__ volatile ("or 2,2,2" : : : "memory")
     172  #define __cctph() __asm__ volatile ("or 3,3,3" : : : "memory")
     173  #define __db8cyc() __asm__ volatile ("or 28,28,28" : : : "memory")
     174  #define __db10cyc() __asm__ volatile ("or 29,29,29" : : : "memory")
     175  #define __db12cyc() __asm__ volatile ("or 30,30,30" : : : "memory")
     176  #define __db16cyc() __asm__ volatile ("or 31,31,31" : : : "memory")
     177  
     178  #ifdef __powerpc64__
     179  #define __mtspr(spr, value) \
     180    __asm__ volatile ("mtspr %0,%1" : : "n" (spr), "r" (value))
     181    
     182  #define __mfspr(spr) __extension__				\
     183    ({ unsigned long long result;					\
     184    __asm__ volatile ("mfspr %0,%1" : "=r" (result) : "n" (spr)); \
     185    result; })
     186  #endif /* __powerpc64__ */
     187  
     188  #ifdef __powerpc64__
     189  /* Work around the hardware bug in the current Cell implementation.  */
     190  #define __mftb() __extension__					\
     191    ({ unsigned long long result;					\
     192    __asm__ volatile ("1: mftb %[current_tb]\n"			\
     193        "\tcmpwi 7, %[current_tb], 0\n"				\
     194        "\tbeq-  7, 1b"						\
     195        : [current_tb] "=r" (result):				\
     196        :"cr7");							\
     197    result; })
     198  #else
     199  #define __mftb() __extension__			\
     200    ({ unsigned long long result;			\
     201    unsigned long t;				\
     202    __asm__ volatile ("1:\n"			\
     203  		    "\tmftbu %0\n"		\
     204  		    "\tmftb %L0\n"		\
     205  		    "\tmftbu %1\n"		\
     206  		    "\tcmpw %0,%1\n"		\
     207  		    "\tbne 1b"			\
     208  		    : "=r" (result), "=r" (t));	\
     209    result; })
     210  #endif /* __powerpc64__ */
     211  
     212  #define __dcbf(base) \
     213    __asm__ volatile ("dcbf %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
     214    
     215  #define __dcbz(base) \
     216    __asm__ volatile ("dcbz %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
     217  
     218  #define __dcbst(base) \
     219    __asm__ volatile ("dcbst %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
     220  
     221  #define __dcbtst(base) \
     222    __asm__ volatile ("dcbtst %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
     223  
     224  #define __dcbt(base) \
     225    __asm__ volatile ("dcbt %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
     226  
     227  #define __icbi(base) \
     228    __asm__ volatile ("icbi %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
     229    
     230  #define __dcbt_TH1000(EATRUNC, D, UG, ID)				\
     231    __asm__ volatile ("dcbt %y0,8"					\
     232  	   : "=Z" (*(__V4SI*) (__SIZE_TYPE__)((((__SIZE_TYPE__) (EATRUNC)) & ~0x7F)	\
     233  	   		       | ((((D) & 1) << 6)			\
     234  	   		       | (((UG) & 1) << 5)			\
     235  	   		       | ((ID) & 0xF)))) : : "memory")
     236  
     237  #define __dcbt_TH1010(GO, S, UNITCNT, T, U, ID)			     \
     238    __asm__ volatile ("dcbt %y0,10"				     \
     239  	   : "=Z" (*(__V4SI*) (__SIZE_TYPE__)((((__SIZE_TYPE__) (GO) & 1) << 31) \
     240  	   		       | (((S) & 0x3) << 29)		     \
     241  	   		       | (((UNITCNT) & 0x3FF) << 7)	     \
     242  	   		       | (((T) & 1) << 6)			     \
     243  	   		       | (((U) & 1) << 5)			     \
     244  	   		       | ((ID) & 0xF))) : : "memory")
     245  
     246  #define __protected_unlimited_stream_set(DIRECTION, ADDR, ID)	\
     247  	__dcbt_TH1000 ((ADDR), (DIRECTION)>>1, 1, (ID))
     248  
     249  #define __protected_stream_set(DIRECTION, ADDR, ID)	\
     250  	__dcbt_TH1000 ((ADDR), (DIRECTION)>>1, 0, (ID))
     251  
     252  #define __protected_stream_stop_all()			\
     253  	__dcbt_TH1010 (0, 3, 0, 0, 0, 0)
     254  
     255  #define __protected_stream_stop(ID)			\
     256  	__dcbt_TH1010 (0, 2, 0, 0, 0, (ID))
     257  
     258  #define __protected_stream_count(COUNT, ID)		\
     259  	__dcbt_TH1010 (0, 0, (COUNT), 0, 0, (ID))
     260  
     261  #define __protected_stream_go()				\
     262  	__dcbt_TH1010 (1, 0, 0, 0, 0, 0)
     263  
     264  #define __lhbrx(base) __extension__		\
     265    ({unsigned short result;	       		\
     266      typedef  struct {char a[2];} halfwordsize;	\
     267      halfwordsize *ptrp = (halfwordsize*)(void*)(base);	\
     268    __asm__ ("lhbrx %0,%y1"			\
     269  	   : "=r" (result)			\
     270  	   : "Z" (*ptrp));			\
     271    result; })
     272  
     273  #define __lwbrx(base) __extension__		\
     274    ({unsigned int result;	       		\
     275      typedef  struct {char a[4];} wordsize;	\
     276      wordsize *ptrp = (wordsize*)(void*)(base);		\
     277    __asm__ ("lwbrx %0,%y1"			\
     278  	   : "=r" (result)			\
     279  	   : "Z" (*ptrp));			\
     280    result; })
     281  
     282  
     283  #ifdef __powerpc64__
     284  #define __ldbrx(base) __extension__			\
     285    ({unsigned long long result;	       			\
     286      typedef  struct {char a[8];} doublewordsize;	\
     287      doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
     288    __asm__ ("ldbrx %0,%y1"				\
     289  	   : "=r" (result)				\
     290  	   : "Z" (*ptrp));				\
     291    result; })
     292  #else
     293  #define __ldbrx(base) __extension__			\
     294    ({unsigned long long result;	       			\
     295      typedef  struct {char a[8];} doublewordsize;	\
     296      doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
     297    __asm__ ("lwbrx %L0,%y1\n"				\
     298  	   "\tlwbrx %0,%y2"				\
     299  	   : "=&r" (result)				\
     300  	   : "Z" (*ptrp), "Z" (*((char *) ptrp + 4)));	\
     301    result; })
     302  #endif /* __powerpc64__ */
     303  
     304  
     305  #define __sthbrx(base, value) do {			\
     306      typedef  struct {char a[2];} halfwordsize;		\
     307      halfwordsize *ptrp = (halfwordsize*)(void*)(base);		\
     308      __asm__ ("sthbrx %1,%y0"				\
     309  	   : "=Z" (*ptrp)				\
     310  	   : "r" (value));				\
     311     } while (0)
     312  
     313  #define __stwbrx(base, value) do {		\
     314      typedef  struct {char a[4];} wordsize;	\
     315      wordsize *ptrp = (wordsize*)(void*)(base);		\
     316      __asm__ ("stwbrx %1,%y0"			\
     317  	   : "=Z" (*ptrp)			\
     318  	   : "r" (value));			\
     319     } while (0)
     320  
     321  #ifdef __powerpc64__
     322  #define __stdbrx(base, value) do {			\
     323      typedef  struct {char a[8];} doublewordsize;	\
     324      doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
     325      __asm__ ("stdbrx %1,%y0"				\
     326  	   : "=Z" (*ptrp)				\
     327  	   : "r" (value));				\
     328     } while (0)
     329  #else
     330  #define __stdbrx(base, value) do {			\
     331      typedef  struct {char a[8];} doublewordsize;	\
     332      doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
     333      __asm__ ("stwbrx %L2,%y0\n"				\
     334  	     "\tstwbrx %2,%y1"				\
     335  	   : "=Z" (*ptrp), "=Z" (*((char *) ptrp + 4))	\
     336  	   : "r" (value));				\
     337     } while (0)
     338  #endif /* __powerpc64__ */
     339  
     340  
     341  #define __lwarx(base) __extension__		\
     342    ({unsigned int result;	       		\
     343      typedef  struct {char a[4];} wordsize;	\
     344      wordsize *ptrp = (wordsize*)(void*)(base);	\
     345    __asm__ volatile ("lwarx %0,%y1"		\
     346  	   : "=r" (result)			\
     347  	   : "Z" (*ptrp));			\
     348    result; })
     349  
     350  #ifdef __powerpc64__
     351  #define __ldarx(base) __extension__			\
     352    ({unsigned long long result;	       			\
     353      typedef  struct {char a[8];} doublewordsize;	\
     354      doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
     355    __asm__ volatile ("ldarx %0,%y1"			\
     356  	   : "=r" (result)				\
     357  	   : "Z" (*ptrp));				\
     358    result; })
     359  #endif /* __powerpc64__ */
     360  
     361  #define __stwcx(base, value) __extension__	\
     362    ({unsigned int result;			\
     363      typedef  struct {char a[4];} wordsize;	\
     364      wordsize *ptrp = (wordsize*)(void*)(base);	\
     365    __asm__ volatile ("stwcx. %2,%y1\n"		\
     366  	   "\tmfocrf %0,0x80"			\
     367  	   : "=r" (result),			\
     368  	     "=Z" (*ptrp)			\
     369  	   : "r" (value) : "cr0");		\
     370    ((result & 0x20000000) >> 29); })
     371  
     372  
     373  #ifdef __powerpc64__
     374  #define __stdcx(base, value) __extension__		\
     375    ({unsigned long long result;				\
     376      typedef  struct {char a[8];} doublewordsize;	\
     377      doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
     378    __asm__ volatile ("stdcx. %2,%y1\n"			\
     379  	   "\tmfocrf %0,0x80"				\
     380  	   : "=r" (result),				\
     381  	     "=Z" (*ptrp)				\
     382  	   : "r" (value) : "cr0");			\
     383    ((result & 0x20000000) >> 29); })
     384  #endif /* __powerpc64__ */
     385  
     386  #define __mffs() __extension__			\
     387    ({double result;				\
     388    __asm__ volatile ("mffs %0" : "=d" (result)); \
     389    result; })
     390  
     391  #define __mtfsf(mask,value) \
     392    __asm__ volatile ("mtfsf %0,%1" : : "n" (mask), "d" ((double) (value)))
     393    
     394  #define __mtfsfi(bits,field) \
     395    __asm__ volatile ("mtfsfi %0,%1" : : "n" (bits), "n" (field))
     396  
     397  #define __mtfsb0(bit) __asm__ volatile ("mtfsb0 %0" : : "n" (bit))
     398  #define __mtfsb1(bit) __asm__ volatile ("mtfsb1 %0" : : "n" (bit))
     399  
     400  #define __setflm(v) __extension__	      \
     401    ({double result;			      \
     402    __asm__ volatile ("mffs %0\n\tmtfsf 255,%1" \
     403  		    : "=&d" (result)	      \
     404  		    : "d" ((double) (v)));    \
     405    result; })
     406  
     407  /* __builtin_fabs may perform unnecessary rounding.  */
     408  
     409  /* Rename __fabs and __fabsf to work around internal prototypes defined 
     410     in bits/mathcalls.h with some glibc versions.  */ 
     411  #define __fabs __ppu_fabs 
     412  #define __fabsf __ppu_fabsf 
     413  
     414  static __inline__ double __fabs(double x) __attribute__((always_inline));
     415  static __inline__ double
     416  __fabs(double x)
     417  {
     418    double r;
     419    __asm__("fabs %0,%1" : "=d"(r) : "d"(x));
     420    return r;
     421  }
     422  
     423  static __inline__ float __fabsf(float x) __attribute__((always_inline));
     424  static __inline__ float
     425  __fabsf(float x)
     426  {
     427    float r;
     428    __asm__("fabs %0,%1" : "=f"(r) : "f"(x));
     429    return r;
     430  }
     431  
     432  static __inline__ double __fnabs(double x) __attribute__((always_inline));
     433  static __inline__ double
     434  __fnabs(double x)
     435  {
     436    double r;
     437    __asm__("fnabs %0,%1" : "=d"(r) : "d"(x));
     438    return r;
     439  }
     440  
     441  static __inline__ float __fnabsf(float x) __attribute__((always_inline));
     442  static __inline__ float
     443  __fnabsf(float x)
     444  {
     445    float r;
     446    __asm__("fnabs %0,%1" : "=f"(r) : "f"(x));
     447    return r;
     448  }
     449  
     450  static __inline__ double __fmadd(double x, double y, double z)
     451    __attribute__((always_inline));
     452  static __inline__ double
     453  __fmadd(double x, double y, double z)
     454  {
     455    double r;
     456    __asm__("fmadd %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
     457    return r;
     458  }
     459  
     460  static __inline__ double __fmsub(double x, double y, double z)
     461    __attribute__((always_inline));
     462  static __inline__ double
     463  __fmsub(double x, double y, double z)
     464  {
     465    double r;
     466    __asm__("fmsub %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
     467    return r;
     468  }
     469  
     470  static __inline__ double __fnmadd(double x, double y, double z)
     471    __attribute__((always_inline));
     472  static __inline__ double
     473  __fnmadd(double x, double y, double z)
     474  {
     475    double r;
     476    __asm__("fnmadd %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
     477    return r;
     478  }
     479  
     480  static __inline__ double __fnmsub(double x, double y, double z)
     481    __attribute__((always_inline));
     482  static __inline__ double
     483  __fnmsub(double x, double y, double z)
     484  {
     485    double r;
     486    __asm__("fnmsub %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
     487    return r;
     488  }
     489  
     490  static __inline__ float __fmadds(float x, float y, float z)
     491    __attribute__((always_inline));
     492  static __inline__ float
     493  __fmadds(float x, float y, float z)
     494  {
     495    float r;
     496    __asm__("fmadds %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
     497    return r;
     498  }
     499  
     500  static __inline__ float __fmsubs(float x, float y, float z)
     501    __attribute__((always_inline));
     502  static __inline__ float
     503  __fmsubs(float x, float y, float z)
     504  {
     505    float r;
     506    __asm__("fmsubs %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
     507    return r;
     508  }
     509  
     510  static __inline__ float __fnmadds(float x, float y, float z)
     511    __attribute__((always_inline));
     512  static __inline__ float
     513  __fnmadds(float x, float y, float z)
     514  {
     515    float r;
     516    __asm__("fnmadds %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
     517    return r;
     518  }
     519  
     520  static __inline__ float __fnmsubs(float x, float y, float z)
     521    __attribute__((always_inline));
     522  static __inline__ float
     523  __fnmsubs(float x, float y, float z)
     524  {
     525    float r;
     526    __asm__("fnmsubs %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
     527    return r;
     528  }
     529  
     530  static __inline__ double __fsel(double x, double y, double z)
     531    __attribute__((always_inline));
     532  static __inline__ double
     533  __fsel(double x, double y, double z)
     534  {
     535    double r;
     536    __asm__("fsel %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
     537    return r;
     538  }
     539  
     540  static __inline__ float __fsels(float x, float y, float z)
     541    __attribute__((always_inline));
     542  static __inline__ float
     543  __fsels(float x, float y, float z)
     544  {
     545    float r;
     546    __asm__("fsel %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
     547    return r;
     548  }
     549  
     550  static __inline__ double __frsqrte(double x) __attribute__((always_inline));
     551  static __inline__ double
     552  __frsqrte(double x)
     553  {
     554    double r;
     555    __asm__("frsqrte %0,%1" : "=d" (r) : "d" (x));
     556    return r;
     557  }
     558  
     559  static __inline__ float __fres(float x) __attribute__((always_inline));
     560  static __inline__ float
     561  __fres(float x)
     562  {
     563    float r;
     564    __asm__("fres %0,%1" : "=f"(r) : "f"(x));
     565    return r;
     566  }
     567  
     568  static __inline__ double __fsqrt(double x) __attribute__((always_inline));
     569  static __inline__ double
     570  __fsqrt(double x)
     571  {
     572    double r;
     573    __asm__("fsqrt %0,%1" : "=d"(r) : "d"(x));
     574    return r;
     575  }
     576  
     577  static __inline__ float __fsqrts(float x) __attribute__((always_inline));
     578  static __inline__ float
     579  __fsqrts(float x)
     580  {
     581    float r;
     582    __asm__("fsqrts %0,%1" : "=f"(r) : "f"(x));
     583    return r;
     584  }
     585  
     586  static __inline__ double __fmul (double a, double b) __attribute__ ((always_inline));
     587  static __inline__ double
     588  __fmul(double a, double b)
     589  {
     590    double d;
     591    __asm__ ("fmul %0,%1,%2" : "=d" (d) : "d" (a), "d" (b));
     592    return d;
     593  }
     594  
     595  static __inline__ float __fmuls (float a, float b) __attribute__ ((always_inline));
     596  static __inline__ float
     597  __fmuls (float a, float b)
     598  {
     599    float d;
     600    __asm__ ("fmuls %0,%1,%2" : "=d" (d) : "f" (a), "f" (b));
     601    return d;
     602  }
     603  
     604  static __inline__ float __frsp (float a) __attribute__ ((always_inline));
     605  static __inline__ float
     606  __frsp (float a)
     607  {
     608    float d;
     609    __asm__ ("frsp %0,%1" : "=d" (d) : "f" (a));
     610    return d;
     611  }
     612  
     613  static __inline__ double __fcfid (long long a) __attribute__((always_inline));
     614  static __inline__ double
     615  __fcfid (long long a)
     616  {
     617    double d;
     618    __asm__ ("fcfid %0,%1" : "=d" (d) : "d" (a));
     619    return d;
     620  }
     621  
     622  static __inline__ long long __fctid (double a) __attribute__ ((always_inline));
     623  static __inline__ long long
     624  __fctid (double a)
     625  {
     626    long long d;
     627    __asm__ ("fctid %0,%1" : "=d" (d) : "d" (a));
     628    return d;
     629  }
     630  
     631  static __inline__ long long __fctidz (double a) __attribute__ ((always_inline));
     632  static __inline__ long long
     633  __fctidz (double a)
     634  {
     635    long long d;
     636    __asm__ ("fctidz %0,%1" : "=d" (d) : "d" (a));
     637    return d;
     638  }
     639  
     640  static __inline__ int __fctiw (double a) __attribute__ ((always_inline));
     641  static __inline__ int
     642  __fctiw (double a)
     643  {
     644    unsigned long long d;
     645    __asm__ ("fctiw %0,%1" : "=d" (d) : "d" (a));
     646    return (int) d;
     647  }
     648  
     649  static __inline__ int __fctiwz (double a) __attribute__ ((always_inline));
     650  static __inline__ int
     651  __fctiwz (double a)
     652  {
     653    long long d;
     654    __asm__ ("fctiwz %0,%1" : "=d" (d) : "d" (a));
     655    return (int) d;
     656  }
     657  
     658  #ifdef __powerpc64__
     659  #define __rldcl(a,b,mb) __extension__ \
     660    ({ \
     661      unsigned long long d; \
     662      __asm__ ("rldcl %0,%1,%2,%3" : "=r" (d) : "r" (a), "r" (b), "i" (mb)); \
     663      d; \
     664    })
     665  
     666  #define __rldcr(a,b,me) __extension__ \
     667    ({ \
     668      unsigned long long d; \
     669      __asm__ ("rldcr %0,%1,%2,%3" : "=r" (d) : "r" (a), "r" (b), "i" (me)); \
     670      d; \
     671    })
     672  
     673  #define __rldic(a,sh,mb) __extension__ \
     674    ({ \
     675      unsigned long long d; \
     676      __asm__ ("rldic %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (mb)); \
     677      d; \
     678    })
     679  
     680  #define __rldicl(a,sh,mb) __extension__ \
     681    ({ \
     682      unsigned long long d; \
     683      __asm__ ("rldicl %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (mb)); \
     684      d; \
     685    })
     686  
     687  #define __rldicr(a,sh,me) __extension__ \
     688    ({ \
     689      unsigned long long d; \
     690      __asm__ ("rldicr %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (me)); \
     691      d; \
     692    })
     693  
     694  #define __rldimi(a,b,sh,mb) __extension__ \
     695    ({ \
     696      unsigned long long d; \
     697      __asm__ ("rldimi %0,%1,%2,%3" : "=r" (d) : "r" (b), "i" (sh), "i" (mb), "0" (a)); \
     698      d; \
     699    })
     700  #endif /* __powerpc64__ */
     701  
     702  #define __rlwimi(a,b,sh,mb,me) __extension__ \
     703    ({ \
     704      unsigned int d; \
     705      __asm__ ("rlwimi %0,%1,%2,%3,%4" : "=r" (d) : "r" (b), "i" (sh), "i" (mb), "i" (me), "0" (a)); \
     706      d; \
     707    })
     708  
     709  #define __rlwinm(a,sh,mb,me) __extension__ \
     710    ({ \
     711      unsigned int d; \
     712      __asm__ ("rlwinm %0,%1,%2,%3,%4" : "=r" (d) : "r" (a), "i" (sh), "i" (mb), "i" (me)); \
     713      d; \
     714    })
     715  
     716  #define __rlwnm(a,b,mb,me) __extension__ \
     717    ({ \
     718      unsigned int d; \
     719      __asm__ ("rlwnm %0,%1,%2,%3,%4" : "=r" (d) : "r" (a), "r" (b), "i" (mb), "i" (me)); \
     720      d; \
     721    })
     722  
     723  #ifdef __cplusplus
     724  }
     725  #endif
     726  
     727  #endif /* _PPU_INTRINSICS_H */