(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
powerpc/
mma-builtin-1.c
       1  /* { dg-do compile } */
       2  /* { dg-require-effective-target power10_ok } */
       3  /* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
       4  
       5  typedef unsigned char  vec_t __attribute__((vector_size(16)));
       6  
       7  void
       8  foo0 (__vector_quad *dst, vec_t *vec)
       9  {
      10    __vector_quad acc;
      11    vec_t vec0 = vec[0];
      12    vec_t vec1 = vec[1];
      13  
      14    __builtin_mma_xvi4ger8 (&acc, vec0, vec1);
      15    __builtin_mma_xvi4ger8pp (&acc, vec0, vec1);
      16    dst[0] = acc;
      17  }
      18  
      19  void
      20  foo1 (__vector_quad *dst, vec_t *vec)
      21  {
      22    __vector_quad acc;
      23    vec_t vec0 = vec[0];
      24    vec_t vec1 = vec[1];
      25  
      26    __builtin_mma_xvi8ger4 (&acc, vec0, vec1);
      27    __builtin_mma_xvi8ger4pp (&acc, vec0, vec1);
      28    __builtin_mma_xvi8ger4spp(&acc, vec0, vec1);
      29    dst[1] = acc;
      30  }
      31  
      32  void
      33  foo2 (__vector_quad *dst, vec_t *vec)
      34  {
      35    __vector_quad acc;
      36    vec_t vec0 = vec[0];
      37    vec_t vec1 = vec[1];
      38  
      39    __builtin_mma_xvi16ger2 (&acc, vec0, vec1);
      40    __builtin_mma_xvi16ger2pp (&acc, vec0, vec1);
      41    dst[2] = acc;
      42  }
      43  
      44  void
      45  foo3 (__vector_quad *dst, vec_t *vec)
      46  {
      47    __vector_quad acc;
      48    vec_t vec0 = vec[0];
      49    vec_t vec1 = vec[1];
      50  
      51    __builtin_mma_xvi16ger2s (&acc, vec0, vec1);
      52    __builtin_mma_xvi16ger2spp (&acc, vec0, vec1);
      53    dst[3] = acc;
      54  }
      55  
      56  void
      57  foo4 (__vector_quad *dst, vec_t *vec)
      58  {
      59    __vector_quad acc;
      60    vec_t vec0 = vec[0];
      61    vec_t vec1 = vec[1];
      62  
      63    __builtin_mma_xvf16ger2 (&acc, vec0, vec1);
      64    __builtin_mma_xvf16ger2pp (&acc, vec0, vec1);
      65    __builtin_mma_xvf16ger2pn (&acc, vec0, vec1);
      66    dst[4] = acc;
      67  }
      68  
      69  void
      70  foo4b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
      71  {
      72    __vector_quad acc;
      73    vec_t vec0 = vec[0];
      74    vec_t vec1 = vec[1];
      75  
      76    acc = src[0];
      77    __builtin_mma_xvf16ger2np (&acc, vec0, vec1);
      78    __builtin_mma_xvf16ger2nn (&acc, vec0, vec1);
      79    dst[4] = acc;
      80  }
      81  
      82  void
      83  foo5 (__vector_quad *dst, vec_t *vec)
      84  {
      85    __vector_quad acc;
      86    vec_t vec0 = vec[0];
      87    vec_t vec1 = vec[1];
      88  
      89    __builtin_mma_xvbf16ger2 (&acc, vec0, vec1);
      90    __builtin_mma_xvbf16ger2pp (&acc, vec0, vec1);
      91    __builtin_mma_xvbf16ger2pn (&acc, vec0, vec1);
      92    dst[5] = acc;
      93  }
      94  
      95  void
      96  foo5b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
      97  {
      98    __vector_quad acc;
      99    vec_t vec0 = vec[0];
     100    vec_t vec1 = vec[1];
     101  
     102    acc = src[0];
     103    __builtin_mma_xvbf16ger2np (&acc, vec0, vec1);
     104    __builtin_mma_xvbf16ger2nn (&acc, vec0, vec1);
     105    dst[5] = acc;
     106  }
     107  
     108  void
     109  foo6 (__vector_quad *dst, vec_t *vec)
     110  {
     111    __vector_quad acc;
     112    vec_t vec0 = vec[0];
     113    vec_t vec1 = vec[1];
     114  
     115    __builtin_mma_xvf32ger (&acc, vec0, vec1);
     116    __builtin_mma_xvf32gerpp (&acc, vec0, vec1);
     117    __builtin_mma_xvf32gerpn (&acc, vec0, vec1);
     118    dst[6] = acc;
     119  }
     120  
     121  void
     122  foo6b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
     123  {
     124    __vector_quad acc;
     125    vec_t vec0 = vec[0];
     126    vec_t vec1 = vec[1];
     127  
     128    acc = src[0];
     129    __builtin_mma_xvf32gernp (&acc, vec0, vec1);
     130    __builtin_mma_xvf32gernn (&acc, vec0, vec1);
     131    dst[6] = acc;
     132  }
     133  
     134  void
     135  foo7 (__vector_quad *dst, vec_t *vec)
     136  {
     137    __vector_quad acc;
     138    vec_t vec0 = vec[0];
     139    vec_t vec1 = vec[1];
     140  
     141    __builtin_mma_pmxvi4ger8 (&acc, vec0, vec1, 15, 15, 255);
     142    __builtin_mma_pmxvi4ger8pp (&acc, vec0, vec1, 15, 15, 255);
     143    dst[7] = acc;
     144  }
     145  
     146  void
     147  foo8 (__vector_quad *dst, vec_t *vec)
     148  {
     149    __vector_quad acc;
     150    vec_t vec0 = vec[0];
     151    vec_t vec1 = vec[1];
     152  
     153    __builtin_mma_pmxvi8ger4 (&acc, vec0, vec1, 15, 15, 15);
     154    __builtin_mma_pmxvi8ger4pp (&acc, vec0, vec1, 15, 15, 15);
     155    __builtin_mma_pmxvi8ger4spp(&acc, vec0, vec1, 15, 15, 15);
     156    dst[8] = acc;
     157  }
     158  
     159  void
     160  foo9 (__vector_quad *dst, vec_t *vec)
     161  {
     162    __vector_quad acc;
     163    vec_t vec0 = vec[0];
     164    vec_t vec1 = vec[1];
     165  
     166    __builtin_mma_pmxvi16ger2 (&acc, vec0, vec1, 15, 15, 3);
     167    __builtin_mma_pmxvi16ger2pp (&acc, vec0, vec1, 15, 15, 3);
     168    dst[9] = acc;
     169  }
     170  
     171  void
     172  foo10 (__vector_quad *dst, vec_t *vec)
     173  {
     174    __vector_quad acc;
     175    vec_t vec0 = vec[0];
     176    vec_t vec1 = vec[1];
     177  
     178    __builtin_mma_pmxvi16ger2s (&acc, vec0, vec1, 15, 15, 3);
     179    __builtin_mma_pmxvi16ger2spp (&acc, vec0, vec1, 15, 15, 3);
     180    dst[10] = acc;
     181  }
     182  
     183  void
     184  foo11 (__vector_quad *dst, vec_t *vec)
     185  {
     186    __vector_quad acc;
     187    vec_t vec0 = vec[0];
     188    vec_t vec1 = vec[1];
     189  
     190    __builtin_mma_pmxvf16ger2 (&acc, vec0, vec1, 15, 15, 3);
     191    __builtin_mma_pmxvf16ger2pp (&acc, vec0, vec1, 15, 15, 3);
     192    __builtin_mma_pmxvf16ger2pn (&acc, vec0, vec1, 15, 15, 3);
     193    dst[11] = acc;
     194  }
     195  
     196  void
     197  foo11b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
     198  {
     199    __vector_quad acc;
     200    vec_t vec0 = vec[0];
     201    vec_t vec1 = vec[1];
     202  
     203    acc = src[0];
     204    __builtin_mma_pmxvf16ger2np (&acc, vec0, vec1, 15, 15, 3);
     205    __builtin_mma_pmxvf16ger2nn (&acc, vec0, vec1, 15, 15, 3);
     206    dst[11] = acc;
     207  }
     208  
     209  void
     210  foo12 (__vector_quad *dst, vec_t *vec)
     211  {
     212    __vector_quad acc;
     213    vec_t vec0 = vec[0];
     214    vec_t vec1 = vec[1];
     215  
     216    __builtin_mma_pmxvbf16ger2 (&acc, vec0, vec1, 15, 15, 3);
     217    __builtin_mma_pmxvbf16ger2pp (&acc, vec0, vec1, 15, 15, 3);
     218    __builtin_mma_pmxvbf16ger2pn (&acc, vec0, vec1, 15, 15, 3);
     219    dst[12] = acc;
     220  }
     221  
     222  void
     223  foo12b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
     224  {
     225    __vector_quad acc;
     226    vec_t vec0 = vec[0];
     227    vec_t vec1 = vec[1];
     228  
     229    acc = src[0];
     230    __builtin_mma_pmxvbf16ger2np (&acc, vec0, vec1, 15, 15, 3);
     231    __builtin_mma_pmxvbf16ger2nn (&acc, vec0, vec1, 15, 15, 3);
     232    dst[12] = acc;
     233  }
     234  
     235  void
     236  foo13 (__vector_quad *dst, vec_t *vec)
     237  {
     238    __vector_quad acc;
     239    vec_t vec0 = vec[0];
     240    vec_t vec1 = vec[1];
     241  
     242    __builtin_mma_pmxvf32ger (&acc, vec0, vec1, 15, 15);
     243    __builtin_mma_pmxvf32gerpp (&acc, vec0, vec1, 15, 15);
     244    __builtin_mma_pmxvf32gerpn (&acc, vec0, vec1, 15, 15);
     245    dst[13] = acc;
     246  }
     247  
     248  void
     249  foo13b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
     250  {
     251    __vector_quad acc;
     252    vec_t vec0 = vec[0];
     253    vec_t vec1 = vec[1];
     254  
     255    acc = src[0];
     256    __builtin_mma_pmxvf32gernp (&acc, vec0, vec1, 15, 15);
     257    __builtin_mma_pmxvf32gernn (&acc, vec0, vec1, 15, 15);
     258    dst[13] = acc;
     259  }
     260  
     261  /* { dg-final { scan-assembler-times {\mlxv\M} 40 } } */
     262  /* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */
     263  /* { dg-final { scan-assembler-times {\mstxvp\M} 40 } } */
     264  /* { dg-final { scan-assembler-times {\mxxmfacc\M} 20 } } */
     265  /* { dg-final { scan-assembler-times {\mxxmtacc\M} 6 } } */
     266  /* { dg-final { scan-assembler-times {\mxvbf16ger2\M} 1 } } */
     267  /* { dg-final { scan-assembler-times {\mxvbf16ger2nn\M} 1 } } */
     268  /* { dg-final { scan-assembler-times {\mxvbf16ger2np\M} 1 } } */
     269  /* { dg-final { scan-assembler-times {\mxvbf16ger2pn\M} 1 } } */
     270  /* { dg-final { scan-assembler-times {\mxvbf16ger2pp\M} 1 } } */
     271  /* { dg-final { scan-assembler-times {\mxvf16ger2\M} 1 } } */
     272  /* { dg-final { scan-assembler-times {\mxvf16ger2nn\M} 1 } } */
     273  /* { dg-final { scan-assembler-times {\mxvf16ger2np\M} 1 } } */
     274  /* { dg-final { scan-assembler-times {\mxvf16ger2pn\M} 1 } } */
     275  /* { dg-final { scan-assembler-times {\mxvf16ger2pp\M} 1 } } */
     276  /* { dg-final { scan-assembler-times {\mxvf32ger\M} 1 } } */
     277  /* { dg-final { scan-assembler-times {\mxvf32gernn\M} 1 } } */
     278  /* { dg-final { scan-assembler-times {\mxvf32gernp\M} 1 } } */
     279  /* { dg-final { scan-assembler-times {\mxvf32gerpn\M} 1 } } */
     280  /* { dg-final { scan-assembler-times {\mxvf32gerpp\M} 1 } } */
     281  /* { dg-final { scan-assembler-times {\mxvi16ger2\M} 1 } } */
     282  /* { dg-final { scan-assembler-times {\mxvi16ger2pp\M} 1 } } */
     283  /* { dg-final { scan-assembler-times {\mxvi16ger2s\M} 1 } } */
     284  /* { dg-final { scan-assembler-times {\mxvi16ger2spp\M} 1 } } */
     285  /* { dg-final { scan-assembler-times {\mxvi4ger8\M} 1 } } */
     286  /* { dg-final { scan-assembler-times {\mxvi4ger8pp\M} 1 } } */
     287  /* { dg-final { scan-assembler-times {\mxvi8ger4\M} 1 } } */
     288  /* { dg-final { scan-assembler-times {\mxvi8ger4pp\M} 1 } } */
     289  /* { dg-final { scan-assembler-times {\mxvi8ger4spp\M} 1 } } */
     290  /* { dg-final { scan-assembler-times {\mpmxvbf16ger2\M} 1 } } */
     291  /* { dg-final { scan-assembler-times {\mpmxvbf16ger2nn\M} 1 } } */
     292  /* { dg-final { scan-assembler-times {\mpmxvbf16ger2np\M} 1 } } */
     293  /* { dg-final { scan-assembler-times {\mpmxvbf16ger2pn\M} 1 } } */
     294  /* { dg-final { scan-assembler-times {\mpmxvbf16ger2pp\M} 1 } } */
     295  /* { dg-final { scan-assembler-times {\mpmxvf16ger2\M} 1 } } */
     296  /* { dg-final { scan-assembler-times {\mpmxvf16ger2nn\M} 1 } } */
     297  /* { dg-final { scan-assembler-times {\mpmxvf16ger2np\M} 1 } } */
     298  /* { dg-final { scan-assembler-times {\mpmxvf16ger2pn\M} 1 } } */
     299  /* { dg-final { scan-assembler-times {\mpmxvf16ger2pp\M} 1 } } */
     300  /* { dg-final { scan-assembler-times {\mpmxvf32ger\M} 1 } } */
     301  /* { dg-final { scan-assembler-times {\mpmxvf32gernn\M} 1 } } */
     302  /* { dg-final { scan-assembler-times {\mpmxvf32gernp\M} 1 } } */
     303  /* { dg-final { scan-assembler-times {\mpmxvf32gerpn\M} 1 } } */
     304  /* { dg-final { scan-assembler-times {\mpmxvf32gerpp\M} 1 } } */
     305  /* { dg-final { scan-assembler-times {\mpmxvi16ger2\M} 1 } } */
     306  /* { dg-final { scan-assembler-times {\mpmxvi16ger2pp\M} 1 } } */
     307  /* { dg-final { scan-assembler-times {\mpmxvi16ger2s\M} 1 } } */
     308  /* { dg-final { scan-assembler-times {\mpmxvi16ger2spp\M} 1 } } */
     309  /* { dg-final { scan-assembler-times {\mpmxvi4ger8\M} 1 } } */
     310  /* { dg-final { scan-assembler-times {\mpmxvi4ger8pp\M} 1 } } */
     311  /* { dg-final { scan-assembler-times {\mpmxvi8ger4\M} 1 } } */
     312  /* { dg-final { scan-assembler-times {\mpmxvi8ger4pp\M} 1 } } */
     313  /* { dg-final { scan-assembler-times {\mpmxvi8ger4spp\M} 1 } } */