(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
i386/
pr92645-4.c
       1  /* { dg-do compile } */
       2  /* { dg-options "-O2 -mavx2 -fdump-tree-optimized -Wno-psabi -mno-avx512f" } */
       3  
       4  typedef unsigned int u32v4 __attribute__((vector_size(16)));
       5  typedef unsigned short u16v16 __attribute__((vector_size(32)));
       6  typedef unsigned char u8v16 __attribute__((vector_size(16)));
       7  
       8  union vec128 {
       9    u8v16 u8;
      10    u32v4 u32;
      11  };
      12  
      13  #define memcpy __builtin_memcpy
      14  
      15  static u16v16 zxt(u8v16 x)
      16  {
      17    return (u16v16) {
      18      x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
      19      x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
      20    };
      21  }
      22  
      23  static u8v16 narrow(u16v16 x)
      24  {
      25    return (u8v16) {
      26      x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
      27      x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
      28    };
      29  }
      30  
      31  void f(char *dst, char *src, unsigned long n, unsigned c)
      32  {
      33    unsigned ia = 255 - (c >> 24);
      34    ia += ia >> 7;
      35  
      36    union vec128 c4 = {0}, ia16 = {0};
      37    c4.u32 += c;
      38    ia16.u8 += (unsigned char)ia;
      39  
      40    u16v16 c16 = (zxt(c4.u8) << 8) + 128;
      41  
      42    for (; n; src += 16, dst += 16, n -= 4) {
      43      union vec128 s;
      44      memcpy(&s, src, sizeof s);
      45      s.u8 = narrow((zxt(s.u8)*zxt(ia16.u8) + c16) >> 8);
      46      memcpy(dst, &s, sizeof s);
      47    }
      48  }
      49  
      50  /* { dg-final { scan-tree-dump-times "\\(vector\\(16\\) short unsigned int\\)" 3 "optimized" } } */
      51  /* We're missing an opportunity to, after later optimizations, combine
      52     a uniform CTOR with a vector promotion to a CTOR on a promoted
      53     element.  */
      54  /* { dg-final { scan-tree-dump-times "\\(vector\\(16\\) short unsigned int\\)" 2 "optimized" { xfail *-*-* } } } */
      55  /* { dg-final { scan-tree-dump-times "VEC_PACK_TRUNC" 1 "optimized" } } */
      56  /* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 2 "optimized" } } */