1  /* { dg-do compile } */
       2  /* { dg-require-effective-target sse3 } */
       3  /* { dg-options "-O3 -msse3" } */
       4  
       5  float a[1024], b[1024];
       6  
       7  void foo()
       8  {
       9    for (int i = 0; i < 256; i++)
      10      {
      11        a[4*i+0] = a[4*i+0] - b[4*i+0];
      12        a[4*i+1] = a[4*i+1] + b[4*i+1];
      13        a[4*i+2] = a[4*i+2] - b[4*i+2];
      14        a[4*i+3] = a[4*i+3] + b[4*i+3];
      15      }
      16  }
      17  
      18  /* We should be able to vectorize this with SLP using the addsub
      19     SLP pattern.  */
      20  /* { dg-final { scan-assembler "addsubps" } } */
      21  /* { dg-final { scan-assembler-not "shuf" } } */