(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
pr23570.c
       1  /* { dg-do compile } */
       2  /* { dg-options "-O2 -msse2" } */
       3  
       4  typedef float __v4sf __attribute__ ((__vector_size__ (16)));
       5  typedef float __m128 __attribute__ ((__vector_size__ (16)));
       6  typedef long long __v2di __attribute__ ((__vector_size__ (16)));
       7  
       8  static __inline __m128
       9  _mm_cmpeq_ps (__m128 __A, __m128 __B)
      10  {
      11    return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
      12  }
      13  
      14  static __inline __m128
      15  _mm_setr_ps (float __Z, float __Y, float __X, float __W)
      16  {
      17    return __extension__ (__m128)(__v4sf){__Z, __Y, __X, __W };
      18  }
      19  
      20  static __inline __m128
      21  _mm_and_si128 (__m128 __A, __m128 __B)
      22  {
      23    return (__m128)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
      24  }
      25  
      26  static __inline __m128
      27  _mm_or_si128 (__m128 __A, __m128 __B)
      28  {
      29    return (__m128)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
      30  }
      31  
      32  typedef union
      33  {
      34    __m128 xmmi;
      35    int si[4];
      36  }
      37  __attribute__ ((aligned (16))) um128;
      38  
      39  um128 u;
      40  
      41  static inline int
      42  sse_max_abs_indexf (float *v, int step, int n)
      43  {
      44    __m128 m1, mm;
      45    __m128 mim, mi, msk;
      46    um128 u, ui;
      47    int n4, step2, step3;
      48    mm = __builtin_ia32_andps ((__m128) (__v4sf)
      49  			     { 0.0, v[step], v[step2], v[step3] }
      50  			     , u.xmmi);
      51    if (n4)
      52      {
      53        int i;
      54        for (i = 0; i < n4; ++i);
      55        msk = (__m128) _mm_cmpeq_ps (m1, mm);
      56        mim = _mm_or_si128 (_mm_and_si128 (msk, mi), mim);
      57      }
      58    ui.xmmi = (__m128) mim;
      59    return ui.si[n];
      60  }
      61  
      62  static void
      63  sse_swap_rowf (float *r1, float *r2, int n)
      64  {
      65    int n4 = (n / 4) * 4;
      66    float *r14end = r1 + n4;
      67    while (r1 < r14end)
      68      {
      69        *r1 = *r2;
      70        r1++;
      71      }
      72  }
      73  
      74  void swap_index (int *, int, int);
      75  void sse_add_rowf (float *, float *, float, int);
      76  
      77  void
      78  ludcompf (float *m, int nw, int *prow, int n)
      79  {
      80    int i, s = 0;
      81    float *pm;
      82    for (i = 0, pm = m; i < n - 1; ++i, pm += nw)
      83      {
      84        int vi = sse_max_abs_indexf (pm + i, nw, n - i);
      85        float *pt;
      86        int j;
      87        if (vi != 0)
      88  	{
      89  	  sse_swap_rowf (pm, pm + vi * nw, nw);
      90  	  swap_index (prow, i, i + vi);
      91  	}
      92        for (j = i + 1, pt = pm + nw; j < n; ++j, pt += nw)
      93  	sse_add_rowf (pt + i + 1, pm + i + 1, -1.0, n - i - 1);
      94      }
      95  }