1  /* { dg-do assemble { target aarch64_asm_sve_ok } } */
       2  /* { dg-options "-O -msve-vector-bits=1024 --save-temps" } */
       3  
       4  #include <stdint.h>
       5  
       6  typedef int64_t vnx8di __attribute__((vector_size (128)));
       7  typedef int32_t vnx16si __attribute__((vector_size (128)));
       8  typedef int16_t vnx32hi __attribute__((vector_size (128)));
       9  typedef int8_t vnx64qi __attribute__((vector_size (128)));
      10  typedef double vnx8df __attribute__((vector_size (128)));
      11  typedef float vnx16sf __attribute__((vector_size (128)));
      12  typedef _Float16 vnx32hf __attribute__((vector_size (128)));
      13  
      14  #define EXTRACT(ELT_TYPE, TYPE, INDEX)		\
      15    ELT_TYPE permute_##TYPE##_##INDEX (void)	\
      16    {						\
      17      TYPE values;				\
      18      asm ("" : "=w" (values));			\
      19      return values[INDEX];			\
      20    }
      21  
      22  #define TEST_ALL(T)				\
      23    T (int64_t, vnx8di, 0)			\
      24    T (int64_t, vnx8di, 1)			\
      25    T (int64_t, vnx8di, 2)			\
      26    T (int64_t, vnx8di, 7)			\
      27    T (int64_t, vnx8di, 8)			\
      28    T (int64_t, vnx8di, 9)			\
      29    T (int64_t, vnx8di, 15)			\
      30    T (int32_t, vnx16si, 0)			\
      31    T (int32_t, vnx16si, 1)			\
      32    T (int32_t, vnx16si, 3)			\
      33    T (int32_t, vnx16si, 4)			\
      34    T (int32_t, vnx16si, 15)			\
      35    T (int32_t, vnx16si, 16)			\
      36    T (int32_t, vnx16si, 21)			\
      37    T (int32_t, vnx16si, 31)			\
      38    T (int16_t, vnx32hi, 0)			\
      39    T (int16_t, vnx32hi, 1)			\
      40    T (int16_t, vnx32hi, 7)			\
      41    T (int16_t, vnx32hi, 8)			\
      42    T (int16_t, vnx32hi, 31)			\
      43    T (int16_t, vnx32hi, 32)			\
      44    T (int16_t, vnx32hi, 47)			\
      45    T (int16_t, vnx32hi, 63)			\
      46    T (int8_t, vnx64qi, 0)			\
      47    T (int8_t, vnx64qi, 1)			\
      48    T (int8_t, vnx64qi, 15)			\
      49    T (int8_t, vnx64qi, 16)			\
      50    T (int8_t, vnx64qi, 63)			\
      51    T (int8_t, vnx64qi, 64)			\
      52    T (int8_t, vnx64qi, 100)			\
      53    T (int8_t, vnx64qi, 127)			\
      54    T (double, vnx8df, 0)				\
      55    T (double, vnx8df, 1)				\
      56    T (double, vnx8df, 2)				\
      57    T (double, vnx8df, 7)				\
      58    T (double, vnx8df, 8)				\
      59    T (double, vnx8df, 9)				\
      60    T (double, vnx8df, 15)			\
      61    T (float, vnx16sf, 0)				\
      62    T (float, vnx16sf, 1)				\
      63    T (float, vnx16sf, 3)				\
      64    T (float, vnx16sf, 4)				\
      65    T (float, vnx16sf, 15)			\
      66    T (float, vnx16sf, 16)			\
      67    T (float, vnx16sf, 21)			\
      68    T (float, vnx16sf, 31)			\
      69    T (_Float16, vnx32hf, 0)			\
      70    T (_Float16, vnx32hf, 1)			\
      71    T (_Float16, vnx32hf, 7)			\
      72    T (_Float16, vnx32hf, 8)			\
      73    T (_Float16, vnx32hf, 31)			\
      74    T (_Float16, vnx32hf, 32)			\
      75    T (_Float16, vnx32hf, 47)			\
      76    T (_Float16, vnx32hf, 63)
      77  
      78  TEST_ALL (EXTRACT)
      79  
      80  /* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]\n} 5 { target aarch64_little_endian } } } */
      81  /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 { target aarch64_big_endian } } } */
      82  /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
      83  /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */
      84  /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
      85  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */
      86  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[7\]\n} 2 } } */
      87  /* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
      88  /* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
      89  
      90  /* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]\n} 5 { target aarch64_little_endian } } } */
      91  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 { target aarch64_big_endian } } } */
      92  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
      93  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
      94  /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */
      95  /* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
      96  /* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
      97  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */
      98  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[15\]\n} 2 } } */
      99  /* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
     100  /* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
     101  
     102  /* Also used to move the result of a non-Advanced SIMD extract.  */
     103  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 5 } } */
     104  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
     105  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
     106  /* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */
     107  /* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
     108  /* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
     109  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */
     110  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[31\]\n} 2 } } */
     111  /* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
     112  /* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
     113  
     114  /* Also used to move the result of a non-Advanced SIMD extract.  */
     115  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 5 } } */
     116  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */
     117  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */
     118  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */
     119  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[63\]\n} 1 } } */
     120  /* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
     121  
     122  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #64\n} 7 } } */
     123  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #72\n} 2 } } */
     124  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #84\n} 2 } } */
     125  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #94\n} 2 } } */
     126  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #100\n} 1 } } */