1  /* { dg-do compile } */
       2  /* { dg-options "-mavx512fp16 -O2" } */
       3  
       4  typedef __bf16 v8bf __attribute__ ((__vector_size__ (16)));
       5  typedef __bf16 v16bf __attribute__ ((__vector_size__ (32)));
       6  typedef __bf16 v32bf __attribute__ ((__vector_size__ (64)));
       7  
       8  #define VEC_EXTRACT(V,S,IDX)			\
       9    S						\
      10    __attribute__((noipa))			\
      11    vec_extract_##V##_##IDX (V v)			\
      12    {						\
      13      return v[IDX];				\
      14    }
      15  
      16  #define VEC_SET(V,S,IDX)			\
      17    V						\
      18    __attribute__((noipa))			\
      19    vec_set_##V##_##IDX (V v, S s)		\
      20    {						\
      21      v[IDX] = s;				\
      22      return v;					\
      23    }
      24  
      25  v8bf
      26  vec_init_v8bf (__bf16 a1, __bf16 a2, __bf16 a3, __bf16 a4,
      27  	       __bf16 a5,  __bf16 a6, __bf16 a7, __bf16 a8)
      28  {
      29      return __extension__ (v8bf) {a1, a2, a3, a4, a5, a6, a7, a8};
      30  }
      31  
      32  v16bf
      33  vec_init_v16bf (__bf16 a1, __bf16 a2, __bf16 a3, __bf16 a4,
      34  	       __bf16 a5,  __bf16 a6, __bf16 a7, __bf16 a8,
      35  	       __bf16 a9,  __bf16 a10, __bf16 a11, __bf16 a12,
      36  	       __bf16 a13,  __bf16 a14, __bf16 a15, __bf16 a16)
      37  {
      38      return __extension__ (v16bf) {a1, a2, a3, a4, a5, a6, a7, a8,
      39  				  a9, a10, a11, a12, a13, a14, a15, a16};
      40  }
      41  
      42  v32bf
      43  vec_init_v32bf (__bf16 a1, __bf16 a2, __bf16 a3, __bf16 a4,
      44  		__bf16 a5, __bf16 a6, __bf16 a7, __bf16 a8,
      45  		__bf16 a9, __bf16 a10, __bf16 a11, __bf16 a12,
      46  		__bf16 a13, __bf16 a14, __bf16 a15, __bf16 a16,
      47  		__bf16 a17, __bf16 a18, __bf16 a19, __bf16 a20,
      48  		__bf16 a21, __bf16 a22, __bf16 a23, __bf16 a24,
      49  		__bf16 a25, __bf16 a26, __bf16 a27, __bf16 a28,
      50  		__bf16 a29, __bf16 a30, __bf16 a31, __bf16 a32)
      51  {
      52      return __extension__ (v32bf) {a1, a2, a3, a4, a5, a6, a7, a8,
      53  				  a9, a10, a11, a12, a13, a14, a15, a16,
      54  				  a17, a18, a19, a20, a21, a22, a23, a24,
      55  				  a25, a26, a27, a28, a29, a30, a31, a32};
      56  }
      57  
      58  v8bf
      59  vec_init_dup_v8bf (__bf16 a1)
      60  {
      61      return __extension__ (v8bf) {a1, a1, a1, a1, a1, a1, a1, a1};
      62  }
      63  
      64  v16bf
      65  vec_init_dup_v16bf (__bf16 a1)
      66  {
      67      return __extension__ (v16bf) {a1, a1, a1, a1, a1, a1, a1, a1,
      68  				  a1, a1, a1, a1, a1, a1, a1, a1};
      69  }
      70  
      71  v32bf
      72  vec_init_dup_v32bf (__bf16 a1)
      73  {
      74      return __extension__ (v32bf) {a1, a1, a1, a1, a1, a1, a1, a1,
      75  				  a1, a1, a1, a1, a1, a1, a1, a1,
      76  				  a1, a1, a1, a1, a1, a1, a1, a1,
      77  				  a1, a1, a1, a1, a1, a1, a1, a1};
      78  }
      79  
      80  /* { dg-final { scan-assembler-times "vpunpcklwd" 28 } } */
      81  /* { dg-final { scan-assembler-times "vpunpckldq" 14 } } */
      82  /* { dg-final { scan-assembler-times "vpunpcklqdq" 7 } } */
      83  
      84  VEC_EXTRACT (v8bf, __bf16, 0);
      85  VEC_EXTRACT (v8bf, __bf16, 4);
      86  VEC_EXTRACT (v16bf, __bf16, 0);
      87  VEC_EXTRACT (v16bf, __bf16, 3);
      88  VEC_EXTRACT (v16bf, __bf16, 8);
      89  VEC_EXTRACT (v16bf, __bf16, 15);
      90  VEC_EXTRACT (v32bf, __bf16, 0);
      91  VEC_EXTRACT (v32bf, __bf16, 5);
      92  VEC_EXTRACT (v32bf, __bf16, 8);
      93  VEC_EXTRACT (v32bf, __bf16, 14);
      94  VEC_EXTRACT (v32bf, __bf16, 16);
      95  VEC_EXTRACT (v32bf, __bf16, 24);
      96  VEC_EXTRACT (v32bf, __bf16, 28);
      97  /* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$8" 2 } } */
      98  /* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$6" 1 } } */
      99  /* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$14" 1 } } */
     100  /* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$10" 1 } } */
     101  /* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$12" 1 } } */
     102  /* { dg-final { scan-assembler-times "vextract" 9 } } */
     103  
     104  VEC_SET (v8bf, __bf16, 4);
     105  VEC_SET (v16bf, __bf16, 3);
     106  VEC_SET (v16bf, __bf16, 8);
     107  VEC_SET (v16bf, __bf16, 15);
     108  VEC_SET (v32bf, __bf16, 5);
     109  VEC_SET (v32bf, __bf16, 8);
     110  VEC_SET (v32bf, __bf16, 14);
     111  VEC_SET (v32bf, __bf16, 16);
     112  VEC_SET (v32bf, __bf16, 24);
     113  VEC_SET (v32bf, __bf16, 28);
     114  /* { dg-final { scan-assembler-times "vpbroadcastw" 13 { target { ! ia32 } } } } */
     115  /* { dg-final { scan-assembler-times "vpblendw" 4 { target { ! ia32 } } } } */
     116  
     117  /* { dg-final { scan-assembler-times "vpbroadcastw" 12 { target ia32 } } } */
     118  /* { dg-final { scan-assembler-times "vpblendw" 3 { target ia32 } } } */
     119  /* { dg-final { scan-assembler-times "vpinsrw" 1 { target ia32 } } } */
     120  
     121  /* { dg-final { scan-assembler-times "vpblendd" 3 } } */