(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.dg/
vect/
fast-math-vect-call-1.c
       1  /* Disabling epilogues until we find a better way to deal with scans.  */
       2  /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
       3  #include "tree-vect.h"
       4  
       5  extern float copysignf (float, float);
       6  extern float sqrtf (float);
       7  extern float fabsf (float);
       8  extern void abort (void);
       9  float a[64], b[64], c[64], d[64];
      10  
      11  __attribute__((noinline, noclone)) void
      12  f1 (int n)
      13  {
      14    int i;
      15    for (i = 0; i < n; i++)
      16      {
      17        a[4 * i + 0] = copysignf (b[4 * i + 0], c[4 * i + 0]) + 1.0f + sqrtf (d[4 * i + 0]);
      18        a[4 * i + 1] = copysignf (b[4 * i + 1], c[4 * i + 1]) + 2.0f + sqrtf (d[4 * i + 1]);
      19        a[4 * i + 2] = copysignf (b[4 * i + 2], c[4 * i + 2]) + 3.0f + sqrtf (d[4 * i + 2]);
      20        a[4 * i + 3] = copysignf (b[4 * i + 3], c[4 * i + 3]) + 4.0f + sqrtf (d[4 * i + 3]);
      21      }
      22  }
      23  
      24  __attribute__((noinline, noclone)) void
      25  f2 (int n)
      26  {
      27    int i;
      28    for (i = 0; i < 2 * n; i++)
      29      {
      30        a[2 * i + 0] = copysignf (b[2 * i + 0], c[2 * i + 0]) + 1.0f + sqrtf (d[2 * i + 0]);
      31        a[2 * i + 1] = copysignf (b[2 * i + 1], c[2 * i + 1]) + 2.0f + sqrtf (d[2 * i + 1]);
      32      }
      33  }
      34  
      35  __attribute__((noinline, noclone)) void
      36  f3 (void)
      37  {
      38    int i;
      39    for (i = 0; i < 64; i++)
      40      a[i] = copysignf (b[i], c[i]) + 1.0f + sqrtf (d[i]);
      41  }
      42  
      43  __attribute__((noinline, noclone)) void
      44  f4 (int n)
      45  {
      46    int i;
      47    for (i = 0; i < 2 * n; i++)
      48      {
      49        a[3 * i + 0] = copysignf (b[3 * i + 0], c[3 * i + 0]) + 1.0f + sqrtf (d[3 * i + 0]);
      50        a[3 * i + 1] = copysignf (b[3 * i + 1], c[3 * i + 1]) + 2.0f + sqrtf (d[3 * i + 1]);
      51        a[3 * i + 2] = copysignf (b[3 * i + 2], c[3 * i + 2]) + 3.0f + sqrtf (d[3 * i + 2]);
      52      }
      53  }
      54  
      55  __attribute__((noinline, noclone)) int
      56  main1 ()
      57  {
      58    int i;
      59  
      60    for (i = 0; i < 64; i++)
      61      {
      62        asm ("");
      63        b[i] = (i & 1) ? -4 * i : 4 * i;
      64        c[i] = (i & 2) ? -8 * i : 8 * i;
      65        d[i] = i * i;
      66      }
      67    f1 (16);
      68    for (i = 0; i < 64; i++)
      69      if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 3) + i - a[i]) >= 0.0001f)
      70        abort ();
      71      else
      72        a[i] = 131.25;
      73    f2 (16);
      74    for (i = 0; i < 64; i++)
      75      if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i & 1) + i - a[i]) >= 0.0001f)
      76        abort ();
      77      else
      78        a[i] = 131.25;
      79    f3 ();
      80    for (i = 0; i < 64; i++)
      81      if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + i - a[i]) >= 0.0001f)
      82        abort ();
      83      else
      84        a[i] = 131.25;
      85    f4 (10);
      86    for (i = 0; i < 60; i++)
      87      if (fabsf (((i & 2) ? -4 * i : 4 * i) + 1 + (i % 3) + i - a[i]) >= 0.0001f)
      88        abort ();
      89    return 0;
      90  }
      91  
      92  int
      93  main ()
      94  {
      95    check_vect ();
      96    return main1 ();
      97  }
      98  
      99  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" { target { vect_call_copysignf && vect_call_sqrtf } } } } */
     100  /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target { { vect_call_copysignf && vect_call_sqrtf } && vect_perm3_int } } } } */