1  /* { dg-do compile } */
       2  /* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -fno-tree-vectorize" } */
       3  
       4  #include "riscv_vector.h"
       5  
       6  void fn3 (void);
       7  
       8  void f (int8_t * restrict in, int8_t * restrict out, int n)
       9  {
      10    for (int i = 0 ; i < n * n * n * n; i++) {
      11      vuint8mf8_t v = *(vuint8mf8_t*)(in + 900 + i);
      12      *(vuint8mf8_t*)(out + 900 + i) = v;
      13    }
      14    fn3 ();
      15    
      16    for (int i = 0 ; i < n * n * n * n; i++) {
      17      vuint8mf8_t v = *(vuint8mf8_t*)(in + 900 + i);
      18      *(vuint8mf8_t*)(out + 900 + i) = v;
      19    }
      20    fn3 ();
      21    
      22    for (int i = 0 ; i < n * n * n * n; i++) {
      23      vuint8mf8_t v = *(vuint8mf8_t*)(in + 900 + i);
      24      *(vuint8mf8_t*)(out + 900 + i) = v;
      25    }
      26    fn3 ();
      27    for (int i = 0 ; i < n * n * n * n; i++) {
      28      vuint8mf8_t v = *(vuint8mf8_t*)(in + 900 + i);
      29      *(vuint8mf8_t*)(out + 900 + i) = v;
      30    }
      31    fn3 ();
      32    for (int i = 0 ; i < n * n * n * n; i++) {
      33      vuint8mf8_t v = *(vuint8mf8_t*)(in + 900 + i);
      34      *(vuint8mf8_t*)(out + 900 + i) = v;
      35    }
      36  
      37  }
      38  
      39  void f2 (int8_t * restrict in, int8_t * restrict out, int n)
      40  {
      41    for (int i = 0 ; i < n * n * n * n; i++) {
      42      vuint8mf4_t v = *(vuint8mf4_t*)(in + 900 + i);
      43      *(vuint8mf4_t*)(out + 900 + i) = v;
      44    }
      45    fn3 ();
      46    
      47    for (int i = 0 ; i < n * n * n * n; i++) {
      48      vuint8mf4_t v = *(vuint8mf4_t*)(in + 900 + i);
      49      *(vuint8mf4_t*)(out + 900 + i) = v;
      50    }
      51    fn3 ();
      52    
      53    for (int i = 0 ; i < n * n * n * n; i++) {
      54      vuint8mf4_t v = *(vuint8mf4_t*)(in + 900 + i);
      55      *(vuint8mf4_t*)(out + 900 + i) = v;
      56    }
      57    fn3 ();
      58    for (int i = 0 ; i < n * n * n * n; i++) {
      59      vuint8mf4_t v = *(vuint8mf4_t*)(in + 900 + i);
      60      *(vuint8mf4_t*)(out + 900 + i) = v;
      61    }
      62    fn3 ();
      63    for (int i = 0 ; i < n * n * n * n; i++) {
      64      vuint8mf4_t v = *(vuint8mf4_t*)(in + 900 + i);
      65      *(vuint8mf4_t*)(out + 900 + i) = v;
      66    }
      67  
      68  }
      69  
      70  void f3 (int8_t * restrict in, int8_t * restrict out, int n)
      71  {
      72    for (int i = 0 ; i < n * n * n * n; i++) {
      73      vuint8mf2_t v = *(vuint8mf2_t*)(in + 900 + i);
      74      *(vuint8mf2_t*)(out + 900 + i) = v;
      75    }
      76    fn3 ();
      77    
      78    for (int i = 0 ; i < n * n * n * n; i++) {
      79      vuint8mf2_t v = *(vuint8mf2_t*)(in + 900 + i);
      80      *(vuint8mf2_t*)(out + 900 + i) = v;
      81    }
      82    fn3 ();
      83    
      84    for (int i = 0 ; i < n * n * n * n; i++) {
      85      vuint8mf2_t v = *(vuint8mf2_t*)(in + 900 + i);
      86      *(vuint8mf2_t*)(out + 900 + i) = v;
      87    }
      88    fn3 ();
      89    for (int i = 0 ; i < n * n * n * n; i++) {
      90      vuint8mf2_t v = *(vuint8mf2_t*)(in + 900 + i);
      91      *(vuint8mf2_t*)(out + 900 + i) = v;
      92    }
      93    fn3 ();
      94    for (int i = 0 ; i < n * n * n * n; i++) {
      95      vuint8mf2_t v = *(vuint8mf2_t*)(in + 900 + i);
      96      *(vuint8mf2_t*)(out + 900 + i) = v;
      97    }
      98  
      99  }
     100  
     101  void f4 (int8_t * restrict in, int8_t * restrict out, int n)
     102  {
     103    for (int i = 0 ; i < n * n * n * n; i++) {
     104      vuint16mf4_t v = *(vuint16mf4_t*)(in + 900 + i);
     105      *(vuint16mf4_t*)(out + 900 + i) = v;
     106    }
     107    fn3 ();
     108    
     109    for (int i = 0 ; i < n * n * n * n; i++) {
     110      vuint16mf4_t v = *(vuint16mf4_t*)(in + 900 + i);
     111      *(vuint16mf4_t*)(out + 900 + i) = v;
     112    }
     113    fn3 ();
     114    
     115    for (int i = 0 ; i < n * n * n * n; i++) {
     116      vuint16mf4_t v = *(vuint16mf4_t*)(in + 900 + i);
     117      *(vuint16mf4_t*)(out + 900 + i) = v;
     118    }
     119    fn3 ();
     120    for (int i = 0 ; i < n * n * n * n; i++) {
     121      vuint16mf4_t v = *(vuint16mf4_t*)(in + 900 + i);
     122      *(vuint16mf4_t*)(out + 900 + i) = v;
     123    }
     124    fn3 ();
     125    for (int i = 0 ; i < n * n * n * n; i++) {
     126      vuint16mf4_t v = *(vuint16mf4_t*)(in + 900 + i);
     127      *(vuint16mf4_t*)(out + 900 + i) = v;
     128    }
     129  
     130  }
     131  
     132  void f5 (int8_t * restrict in, int8_t * restrict out, int n)
     133  {
     134    for (int i = 0 ; i < n * n * n * n; i++) {
     135      vuint16mf2_t v = *(vuint16mf2_t*)(in + 900 + i);
     136      *(vuint16mf2_t*)(out + 900 + i) = v;
     137    }
     138    fn3 ();
     139    
     140    for (int i = 0 ; i < n * n * n * n; i++) {
     141      vuint16mf2_t v = *(vuint16mf2_t*)(in + 900 + i);
     142      *(vuint16mf2_t*)(out + 900 + i) = v;
     143    }
     144    fn3 ();
     145    
     146    for (int i = 0 ; i < n * n * n * n; i++) {
     147      vuint16mf2_t v = *(vuint16mf2_t*)(in + 900 + i);
     148      *(vuint16mf2_t*)(out + 900 + i) = v;
     149    }
     150    fn3 ();
     151    for (int i = 0 ; i < n * n * n * n; i++) {
     152      vuint16mf2_t v = *(vuint16mf2_t*)(in + 900 + i);
     153      *(vuint16mf2_t*)(out + 900 + i) = v;
     154    }
     155    fn3 ();
     156    for (int i = 0 ; i < n * n * n * n; i++) {
     157      vuint16mf2_t v = *(vuint16mf2_t*)(in + 900 + i);
     158      *(vuint16mf2_t*)(out + 900 + i) = v;
     159    }
     160  
     161  }
     162  
     163  void f6 (int8_t * restrict in, int8_t * restrict out, int n)
     164  {
     165    for (int i = 0 ; i < n * n * n * n; i++) {
     166      vuint32mf2_t v = *(vuint32mf2_t*)(in + 900 + i);
     167      *(vuint32mf2_t*)(out + 900 + i) = v;
     168    }
     169    fn3 ();
     170    
     171    for (int i = 0 ; i < n * n * n * n; i++) {
     172      vuint32mf2_t v = *(vuint32mf2_t*)(in + 900 + i);
     173      *(vuint32mf2_t*)(out + 900 + i) = v;
     174    }
     175    fn3 ();
     176    
     177    for (int i = 0 ; i < n * n * n * n; i++) {
     178      vuint32mf2_t v = *(vuint32mf2_t*)(in + 900 + i);
     179      *(vuint32mf2_t*)(out + 900 + i) = v;
     180    }
     181    fn3 ();
     182    for (int i = 0 ; i < n * n * n * n; i++) {
     183      vuint32mf2_t v = *(vuint32mf2_t*)(in + 900 + i);
     184      *(vuint32mf2_t*)(out + 900 + i) = v;
     185    }
     186    fn3 ();
     187    for (int i = 0 ; i < n * n * n * n; i++) {
     188      vuint32mf2_t v = *(vuint32mf2_t*)(in + 900 + i);
     189      *(vuint32mf2_t*)(out + 900 + i) = v;
     190    }
     191  
     192  }
     193  
     194  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 5 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
     195  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]\s+\.L[0-9]\:\s+vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)} 5 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
     196  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 5 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
     197  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au]\s+\.L[0-9][0-9]\:\s+vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)} 5 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
     198  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 5 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
     199  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au]\s+\.L[0-9][0-9]\:\s+vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)} 5 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
     200  
     201  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au]} 5 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
     202  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au]\s+\.L[0-9][0-9]\:\s+vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)} 5 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
     203  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au]} 5 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
     204  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au]\s+\.L[0-9][0-9]\:\s+vle16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)} 5 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
     205  
     206  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 5 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
     207  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]\s+\.L[0-9][0-9]\:\s+vle32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)} 5 { target { no-opts "-O0"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */