(root)/
gcc-13.2.0/
gcc/
config/
mips/
loongson-mmiintrin.h
       1  /* Intrinsics for Loongson MultiMedia extension Instructions operations.
       2  
       3     Copyright (C) 2008-2023 Free Software Foundation, Inc.
       4     Contributed by CodeSourcery.
       5  
       6     This file is part of GCC.
       7  
       8     GCC is free software; you can redistribute it and/or modify it
       9     under the terms of the GNU General Public License as published
      10     by the Free Software Foundation; either version 3, or (at your
      11     option) any later version.
      12  
      13     GCC is distributed in the hope that it will be useful, but WITHOUT
      14     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      15     or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
      16     License for more details.
      17  
      18     Under Section 7 of GPL version 3, you are granted additional
      19     permissions described in the GCC Runtime Library Exception, version
      20     3.1, as published by the Free Software Foundation.
      21  
      22     You should have received a copy of the GNU General Public License and
      23     a copy of the GCC Runtime Library Exception along with this program;
      24     see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      25     <http://www.gnu.org/licenses/>.  */
      26  
      27  #ifndef _GCC_LOONGSON_MMIINTRIN_H
      28  #define _GCC_LOONGSON_MMIINTRIN_H
      29  
      30  #if !defined(__mips_loongson_mmi)
      31  # error You must select -mloongson-mmi or -march=loongson2e/2f/3a to use\
      32   loongson-mmiintrin.h
      33  #endif
      34  
      35  #ifdef __cplusplus
      36  extern "C" {
      37  #endif
      38  
      39  #include <stdint.h>
      40  
      41  /* Vectors of unsigned bytes, halfwords and words.  */
      42  typedef uint8_t uint8x8_t __attribute__((vector_size (8)));
      43  typedef uint16_t uint16x4_t __attribute__((vector_size (8)));
      44  typedef uint32_t uint32x2_t __attribute__((vector_size (8)));
      45  
      46  /* Vectors of signed bytes, halfwords and words.  */
      47  typedef int8_t int8x8_t __attribute__((vector_size (8)));
      48  typedef int16_t int16x4_t __attribute__((vector_size (8)));
      49  typedef int32_t int32x2_t __attribute__((vector_size (8)));
      50  
      51  /* SIMD intrinsics.
      52     Unless otherwise noted, calls to the functions below will expand into
      53     precisely one machine instruction, modulo any moves required to
      54     satisfy register allocation constraints.  */
      55  
      56  /* Pack with signed saturation.  */
      57  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
      58  packsswh (int32x2_t s, int32x2_t t)
      59  {
      60    return __builtin_loongson_packsswh (s, t);
      61  }
      62  
      63  __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
      64  packsshb (int16x4_t s, int16x4_t t)
      65  {
      66    return __builtin_loongson_packsshb (s, t);
      67  }
      68  
      69  /* Pack with unsigned saturation.  */
      70  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
      71  packushb (uint16x4_t s, uint16x4_t t)
      72  {
      73    return __builtin_loongson_packushb (s, t);
      74  }
      75  
      76  /* Vector addition, treating overflow by wraparound.  */
      77  __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
      78  paddw_u (uint32x2_t s, uint32x2_t t)
      79  {
      80    return __builtin_loongson_paddw_u (s, t);
      81  }
      82  
      83  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
      84  paddh_u (uint16x4_t s, uint16x4_t t)
      85  {
      86    return __builtin_loongson_paddh_u (s, t);
      87  }
      88  
      89  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
      90  paddb_u (uint8x8_t s, uint8x8_t t)
      91  {
      92    return __builtin_loongson_paddb_u (s, t);
      93  }
      94  
      95  __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
      96  paddw_s (int32x2_t s, int32x2_t t)
      97  {
      98    return __builtin_loongson_paddw_s (s, t);
      99  }
     100  
     101  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     102  paddh_s (int16x4_t s, int16x4_t t)
     103  {
     104    return __builtin_loongson_paddh_s (s, t);
     105  }
     106  
     107  __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
     108  paddb_s (int8x8_t s, int8x8_t t)
     109  {
     110    return __builtin_loongson_paddb_s (s, t);
     111  }
     112  
     113  /* Addition of doubleword integers, treating overflow by wraparound.  */
     114  __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
     115  paddd_u (uint64_t s, uint64_t t)
     116  {
     117    return __builtin_loongson_paddd_u (s, t);
     118  }
     119  
     120  __extension__ static __inline int64_t __attribute__ ((__always_inline__))
     121  paddd_s (int64_t s, int64_t t)
     122  {
     123    return __builtin_loongson_paddd_s (s, t);
     124  }
     125  
     126  /* Vector addition, treating overflow by signed saturation.  */
     127  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     128  paddsh (int16x4_t s, int16x4_t t)
     129  {
     130    return __builtin_loongson_paddsh (s, t);
     131  }
     132  
     133  __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
     134  paddsb (int8x8_t s, int8x8_t t)
     135  {
     136    return __builtin_loongson_paddsb (s, t);
     137  }
     138  
     139  /* Vector addition, treating overflow by unsigned saturation.  */
     140  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     141  paddush (uint16x4_t s, uint16x4_t t)
     142  {
     143    return __builtin_loongson_paddush (s, t);
     144  }
     145  
     146  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     147  paddusb (uint8x8_t s, uint8x8_t t)
     148  {
     149    return __builtin_loongson_paddusb (s, t);
     150  }
     151  
     152  /* Logical AND NOT.  */
     153  __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
     154  pandn_ud (uint64_t s, uint64_t t)
     155  {
     156    return __builtin_loongson_pandn_ud (s, t);
     157  }
     158  
     159  __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
     160  pandn_uw (uint32x2_t s, uint32x2_t t)
     161  {
     162    return __builtin_loongson_pandn_uw (s, t);
     163  }
     164  
     165  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     166  pandn_uh (uint16x4_t s, uint16x4_t t)
     167  {
     168    return __builtin_loongson_pandn_uh (s, t);
     169  }
     170  
     171  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     172  pandn_ub (uint8x8_t s, uint8x8_t t)
     173  {
     174    return __builtin_loongson_pandn_ub (s, t);
     175  }
     176  
     177  __extension__ static __inline int64_t __attribute__ ((__always_inline__))
     178  pandn_sd (int64_t s, int64_t t)
     179  {
     180    return __builtin_loongson_pandn_sd (s, t);
     181  }
     182  
     183  __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
     184  pandn_sw (int32x2_t s, int32x2_t t)
     185  {
     186    return __builtin_loongson_pandn_sw (s, t);
     187  }
     188  
     189  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     190  pandn_sh (int16x4_t s, int16x4_t t)
     191  {
     192    return __builtin_loongson_pandn_sh (s, t);
     193  }
     194  
     195  __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
     196  pandn_sb (int8x8_t s, int8x8_t t)
     197  {
     198    return __builtin_loongson_pandn_sb (s, t);
     199  }
     200  
     201  /* Average.  */
     202  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     203  pavgh (uint16x4_t s, uint16x4_t t)
     204  {
     205    return __builtin_loongson_pavgh (s, t);
     206  }
     207  
     208  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     209  pavgb (uint8x8_t s, uint8x8_t t)
     210  {
     211    return __builtin_loongson_pavgb (s, t);
     212  }
     213  
     214  /* Equality test.  */
     215  __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
     216  pcmpeqw_u (uint32x2_t s, uint32x2_t t)
     217  {
     218    return __builtin_loongson_pcmpeqw_u (s, t);
     219  }
     220  
     221  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     222  pcmpeqh_u (uint16x4_t s, uint16x4_t t)
     223  {
     224    return __builtin_loongson_pcmpeqh_u (s, t);
     225  }
     226  
     227  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     228  pcmpeqb_u (uint8x8_t s, uint8x8_t t)
     229  {
     230    return __builtin_loongson_pcmpeqb_u (s, t);
     231  }
     232  
     233  __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
     234  pcmpeqw_s (int32x2_t s, int32x2_t t)
     235  {
     236    return __builtin_loongson_pcmpeqw_s (s, t);
     237  }
     238  
     239  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     240  pcmpeqh_s (int16x4_t s, int16x4_t t)
     241  {
     242    return __builtin_loongson_pcmpeqh_s (s, t);
     243  }
     244  
     245  __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
     246  pcmpeqb_s (int8x8_t s, int8x8_t t)
     247  {
     248    return __builtin_loongson_pcmpeqb_s (s, t);
     249  }
     250  
     251  /* Greater-than test.  */
     252  __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
     253  pcmpgtw_u (uint32x2_t s, uint32x2_t t)
     254  {
     255    return __builtin_loongson_pcmpgtw_u (s, t);
     256  }
     257  
     258  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     259  pcmpgth_u (uint16x4_t s, uint16x4_t t)
     260  {
     261    return __builtin_loongson_pcmpgth_u (s, t);
     262  }
     263  
     264  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     265  pcmpgtb_u (uint8x8_t s, uint8x8_t t)
     266  {
     267    return __builtin_loongson_pcmpgtb_u (s, t);
     268  }
     269  
     270  __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
     271  pcmpgtw_s (int32x2_t s, int32x2_t t)
     272  {
     273    return __builtin_loongson_pcmpgtw_s (s, t);
     274  }
     275  
     276  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     277  pcmpgth_s (int16x4_t s, int16x4_t t)
     278  {
     279    return __builtin_loongson_pcmpgth_s (s, t);
     280  }
     281  
     282  __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
     283  pcmpgtb_s (int8x8_t s, int8x8_t t)
     284  {
     285    return __builtin_loongson_pcmpgtb_s (s, t);
     286  }
     287  
     288  /* Extract halfword.  */
     289  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     290  pextrh_u (uint16x4_t s, int field /* 0--3.  */)
     291  {
     292    return __builtin_loongson_pextrh_u (s, field);
     293  }
     294  
     295  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     296  pextrh_s (int16x4_t s, int field /* 0--3.  */)
     297  {
     298    return __builtin_loongson_pextrh_s (s, field);
     299  }
     300  
     301  /* Insert halfword.  */
     302  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     303  pinsrh_0_u (uint16x4_t s, uint16x4_t t)
     304  {
     305    return __builtin_loongson_pinsrh_0_u (s, t);
     306  }
     307  
     308  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     309  pinsrh_1_u (uint16x4_t s, uint16x4_t t)
     310  {
     311    return __builtin_loongson_pinsrh_1_u (s, t);
     312  }
     313  
     314  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     315  pinsrh_2_u (uint16x4_t s, uint16x4_t t)
     316  {
     317    return __builtin_loongson_pinsrh_2_u (s, t);
     318  }
     319  
     320  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     321  pinsrh_3_u (uint16x4_t s, uint16x4_t t)
     322  {
     323    return __builtin_loongson_pinsrh_3_u (s, t);
     324  }
     325  
     326  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     327  pinsrh_0_s (int16x4_t s, int16x4_t t)
     328  {
     329    return __builtin_loongson_pinsrh_0_s (s, t);
     330  }
     331  
     332  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     333  pinsrh_1_s (int16x4_t s, int16x4_t t)
     334  {
     335    return __builtin_loongson_pinsrh_1_s (s, t);
     336  }
     337  
     338  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     339  pinsrh_2_s (int16x4_t s, int16x4_t t)
     340  {
     341    return __builtin_loongson_pinsrh_2_s (s, t);
     342  }
     343  
     344  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     345  pinsrh_3_s (int16x4_t s, int16x4_t t)
     346  {
     347    return __builtin_loongson_pinsrh_3_s (s, t);
     348  }
     349  
     350  /* Multiply and add.  */
     351  __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
     352  pmaddhw (int16x4_t s, int16x4_t t)
     353  {
     354    return __builtin_loongson_pmaddhw (s, t);
     355  }
     356  
     357  /* Maximum of signed halfwords.  */
     358  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     359  pmaxsh (int16x4_t s, int16x4_t t)
     360  {
     361    return __builtin_loongson_pmaxsh (s, t);
     362  }
     363  
     364  /* Maximum of unsigned bytes.  */
     365  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     366  pmaxub (uint8x8_t s, uint8x8_t t)
     367  {
     368    return __builtin_loongson_pmaxub (s, t);
     369  }
     370  
     371  /* Minimum of signed halfwords.  */
     372  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     373  pminsh (int16x4_t s, int16x4_t t)
     374  {
     375    return __builtin_loongson_pminsh (s, t);
     376  }
     377  
     378  /* Minimum of unsigned bytes.  */
     379  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     380  pminub (uint8x8_t s, uint8x8_t t)
     381  {
     382    return __builtin_loongson_pminub (s, t);
     383  }
     384  
     385  /* Move byte mask.  */
     386  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     387  pmovmskb_u (uint8x8_t s)
     388  {
     389    return __builtin_loongson_pmovmskb_u (s);
     390  }
     391  
     392  __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
     393  pmovmskb_s (int8x8_t s)
     394  {
     395    return __builtin_loongson_pmovmskb_s (s);
     396  }
     397  
     398  /* Multiply unsigned integers and store high result.  */
     399  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     400  pmulhuh (uint16x4_t s, uint16x4_t t)
     401  {
     402    return __builtin_loongson_pmulhuh (s, t);
     403  }
     404  
     405  /* Multiply signed integers and store high result.  */
     406  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     407  pmulhh (int16x4_t s, int16x4_t t)
     408  {
     409    return __builtin_loongson_pmulhh (s, t);
     410  }
     411  
     412  /* Multiply signed integers and store low result.  */
     413  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     414  pmullh (int16x4_t s, int16x4_t t)
     415  {
     416    return __builtin_loongson_pmullh (s, t);
     417  }
     418  
     419  /* Multiply unsigned word integers.  */
     420  __extension__ static __inline int64_t __attribute__ ((__always_inline__))
     421  pmuluw (uint32x2_t s, uint32x2_t t)
     422  {
     423    return __builtin_loongson_pmuluw (s, t);
     424  }
     425  
     426  /* Absolute difference.  */
     427  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     428  pasubub (uint8x8_t s, uint8x8_t t)
     429  {
     430    return __builtin_loongson_pasubub (s, t);
     431  }
     432  
     433  /* Sum of unsigned byte integers.  */
     434  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     435  biadd (uint8x8_t s)
     436  {
     437    return __builtin_loongson_biadd (s);
     438  }
     439  
     440  /* Sum of absolute differences.
     441     Note that this intrinsic expands into two machine instructions:
     442     PASUBUB followed by BIADD.  */
     443  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     444  psadbh (uint8x8_t s, uint8x8_t t)
     445  {
     446    return __builtin_loongson_psadbh (s, t);
     447  }
     448  
     449  /* Shuffle halfwords.  */
     450  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     451  pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order)
     452  {
     453    return __builtin_loongson_pshufh_u (s, order);
     454  }
     455  
     456  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     457  pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order)
     458  {
     459    return __builtin_loongson_pshufh_s (s, order);
     460  }
     461  
     462  /* Shift left logical.  */
     463  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     464  psllh_u (uint16x4_t s, uint8_t amount)
     465  {
     466    return __builtin_loongson_psllh_u (s, amount);
     467  }
     468  
     469  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     470  psllh_s (int16x4_t s, uint8_t amount)
     471  {
     472    return __builtin_loongson_psllh_s (s, amount);
     473  }
     474  
     475  __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
     476  psllw_u (uint32x2_t s, uint8_t amount)
     477  {
     478    return __builtin_loongson_psllw_u (s, amount);
     479  }
     480  
     481  __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
     482  psllw_s (int32x2_t s, uint8_t amount)
     483  {
     484    return __builtin_loongson_psllw_s (s, amount);
     485  }
     486  
     487  /* Shift right logical.  */
     488  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     489  psrlh_u (uint16x4_t s, uint8_t amount)
     490  {
     491    return __builtin_loongson_psrlh_u (s, amount);
     492  }
     493  
     494  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     495  psrlh_s (int16x4_t s, uint8_t amount)
     496  {
     497    return __builtin_loongson_psrlh_s (s, amount);
     498  }
     499  
     500  __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
     501  psrlw_u (uint32x2_t s, uint8_t amount)
     502  {
     503    return __builtin_loongson_psrlw_u (s, amount);
     504  }
     505  
     506  __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
     507  psrlw_s (int32x2_t s, uint8_t amount)
     508  {
     509    return __builtin_loongson_psrlw_s (s, amount);
     510  }
     511  
     512  /* Shift right arithmetic.  */
     513  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     514  psrah_u (uint16x4_t s, uint8_t amount)
     515  {
     516    return __builtin_loongson_psrah_u (s, amount);
     517  }
     518  
     519  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     520  psrah_s (int16x4_t s, uint8_t amount)
     521  {
     522    return __builtin_loongson_psrah_s (s, amount);
     523  }
     524  
     525  __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
     526  psraw_u (uint32x2_t s, uint8_t amount)
     527  {
     528    return __builtin_loongson_psraw_u (s, amount);
     529  }
     530  
     531  __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
     532  psraw_s (int32x2_t s, uint8_t amount)
     533  {
     534    return __builtin_loongson_psraw_s (s, amount);
     535  }
     536  
     537  /* Vector subtraction, treating overflow by wraparound.  */
     538  __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
     539  psubw_u (uint32x2_t s, uint32x2_t t)
     540  {
     541    return __builtin_loongson_psubw_u (s, t);
     542  }
     543  
     544  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     545  psubh_u (uint16x4_t s, uint16x4_t t)
     546  {
     547    return __builtin_loongson_psubh_u (s, t);
     548  }
     549  
     550  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     551  psubb_u (uint8x8_t s, uint8x8_t t)
     552  {
     553    return __builtin_loongson_psubb_u (s, t);
     554  }
     555  
     556  __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
     557  psubw_s (int32x2_t s, int32x2_t t)
     558  {
     559    return __builtin_loongson_psubw_s (s, t);
     560  }
     561  
     562  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     563  psubh_s (int16x4_t s, int16x4_t t)
     564  {
     565    return __builtin_loongson_psubh_s (s, t);
     566  }
     567  
     568  __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
     569  psubb_s (int8x8_t s, int8x8_t t)
     570  {
     571    return __builtin_loongson_psubb_s (s, t);
     572  }
     573  
     574  /* Subtraction of doubleword integers, treating overflow by wraparound.  */
     575  __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
     576  psubd_u (uint64_t s, uint64_t t)
     577  {
     578    return __builtin_loongson_psubd_u (s, t);
     579  }
     580  
     581  __extension__ static __inline int64_t __attribute__ ((__always_inline__))
     582  psubd_s (int64_t s, int64_t t)
     583  {
     584    return __builtin_loongson_psubd_s (s, t);
     585  }
     586  
     587  /* Vector subtraction, treating overflow by signed saturation.  */
     588  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     589  psubsh (int16x4_t s, int16x4_t t)
     590  {
     591    return __builtin_loongson_psubsh (s, t);
     592  }
     593  
     594  __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
     595  psubsb (int8x8_t s, int8x8_t t)
     596  {
     597    return __builtin_loongson_psubsb (s, t);
     598  }
     599  
     600  /* Vector subtraction, treating overflow by unsigned saturation.  */
     601  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     602  psubush (uint16x4_t s, uint16x4_t t)
     603  {
     604    return __builtin_loongson_psubush (s, t);
     605  }
     606  
     607  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     608  psubusb (uint8x8_t s, uint8x8_t t)
     609  {
     610    return __builtin_loongson_psubusb (s, t);
     611  }
     612  
     613  /* Unpack high data.  */
     614  __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
     615  punpckhwd_u (uint32x2_t s, uint32x2_t t)
     616  {
     617    return __builtin_loongson_punpckhwd_u (s, t);
     618  }
     619  
     620  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     621  punpckhhw_u (uint16x4_t s, uint16x4_t t)
     622  {
     623    return __builtin_loongson_punpckhhw_u (s, t);
     624  }
     625  
     626  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     627  punpckhbh_u (uint8x8_t s, uint8x8_t t)
     628  {
     629    return __builtin_loongson_punpckhbh_u (s, t);
     630  }
     631  
     632  __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
     633  punpckhwd_s (int32x2_t s, int32x2_t t)
     634  {
     635    return __builtin_loongson_punpckhwd_s (s, t);
     636  }
     637  
     638  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     639  punpckhhw_s (int16x4_t s, int16x4_t t)
     640  {
     641    return __builtin_loongson_punpckhhw_s (s, t);
     642  }
     643  
     644  __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
     645  punpckhbh_s (int8x8_t s, int8x8_t t)
     646  {
     647    return __builtin_loongson_punpckhbh_s (s, t);
     648  }
     649  
     650  /* Unpack low data.  */
     651  __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
     652  punpcklwd_u (uint32x2_t s, uint32x2_t t)
     653  {
     654    return __builtin_loongson_punpcklwd_u (s, t);
     655  }
     656  
     657  __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
     658  punpcklhw_u (uint16x4_t s, uint16x4_t t)
     659  {
     660    return __builtin_loongson_punpcklhw_u (s, t);
     661  }
     662  
     663  __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
     664  punpcklbh_u (uint8x8_t s, uint8x8_t t)
     665  {
     666    return __builtin_loongson_punpcklbh_u (s, t);
     667  }
     668  
     669  __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
     670  punpcklwd_s (int32x2_t s, int32x2_t t)
     671  {
     672    return __builtin_loongson_punpcklwd_s (s, t);
     673  }
     674  
     675  __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
     676  punpcklhw_s (int16x4_t s, int16x4_t t)
     677  {
     678    return __builtin_loongson_punpcklhw_s (s, t);
     679  }
     680  
     681  __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
     682  punpcklbh_s (int8x8_t s, int8x8_t t)
     683  {
     684    return __builtin_loongson_punpcklbh_s (s, t);
     685  }
     686  
     687  #ifdef __cplusplus
     688  }
     689  #endif
     690  
     691  #endif