(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
pr104188.c
       1  /* { dg-do run { target avx512f } } */
       2  /* { dg-require-effective-target sse2_runtime } */
       3  /* { dg-options "-O2 -msse2 -mfpmath=sse" } */
       4  
       5  #include <x86intrin.h>
       6  
       7  union U {
       8    float m[4][4];
       9    __m128 r[4];
      10    __m512 s;
      11  };
      12  
      13  __attribute__((noipa, target("avx512f")))
      14  void
      15  foo (union U *x, union U *a, union U *b)
      16  {
      17    __m512 c = _mm512_loadu_ps (&a->s);
      18    __m512 d = _mm512_broadcast_f32x4 (b->r[0]);
      19    __m512 e = _mm512_broadcast_f32x4 (b->r[1]);
      20    __m512 f = _mm512_broadcast_f32x4 (b->r[2]);
      21    __m512 g = _mm512_broadcast_f32x4 (b->r[3]);
      22    __m512 h = _mm512_mul_ps (_mm512_permute_ps (c, 0x00), d);
      23    h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0x55), e, h);
      24    h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xaa), f, h);
      25    h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xff), g, h);
      26    _mm512_storeu_ps (&x->s, h);
      27  }
      28  
      29  __attribute__((noipa, target("avx512f")))
      30  void
      31  do_test (void)
      32  {
      33    union U a = { .m = { { 1.0f, 2.0f, 3.0f, 4.0f },
      34  		       { 5.0f, 6.0f, 7.0f, 8.0f },
      35  		       { 9.0f, 10.0f, 11.0f, 12.0f },
      36  		       { 13.0f, 14.0f, 15.0f, 16.0f } } };
      37    union U b = { .m = { { 17.0f, 18.0f, 19.0f, 20.0f },
      38  		       { 21.0f, 22.0f, 23.0f, 24.0f },
      39  		       { 25.0f, 26.0f, 27.0f, 28.0f },
      40  		       { 29.0f, 30.0f, 31.0f, 32.0f } } };
      41    union U c;
      42    foo (&c, &a, &b);
      43    if (c.m[0][0] != 250.0f
      44        || c.m[0][1] != 260.0f
      45        || c.m[0][2] != 270.0f
      46        || c.m[0][3] != 280.0f)
      47      __builtin_abort ();
      48    if (c.m[1][0] != 618.0f
      49        || c.m[1][1] != 644.0f
      50        || c.m[1][2] != 670.0f
      51        || c.m[1][3] != 696.0f)
      52      __builtin_abort ();
      53    if (c.m[2][0] != 986.0f
      54        || c.m[2][1] != 1028.0f
      55        || c.m[2][2] != 1070.0f
      56        || c.m[2][3] != 1112.0f)
      57      __builtin_abort ();
      58    if (c.m[3][0] != 1354.0f
      59        || c.m[3][1] != 1412.0f
      60        || c.m[3][2] != 1470.0f
      61        || c.m[3][3] != 1528.0f)
      62      __builtin_abort ();
      63  }
      64  
      65  int
      66  main ()
      67  {
      68    if (__builtin_cpu_supports ("avx512f"))
      69      do_test ();
      70    return 0;
      71  }