1  /* { dg-do compile } */
       2  /* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
       3  
       4  #include "riscv_vector.h"
       5  
       6  void f0 (int16_t *base,int8_t *out,size_t vl, size_t shift)
       7  {
       8      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
       9      vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
      10      __riscv_vse8_v_u8mf8 (out,v,vl);
      11  }
      12  
      13  void f1 (int16_t *base,int8_t *out,size_t vl, size_t shift)
      14  {
      15      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
      16      vuint8mf8_t src2 = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 100), vl);
      17      vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_tu(src2,src,shift,vl);
      18      __riscv_vse8_v_u8mf8 (out,v,vl);
      19  }
      20  
      21  void f2 (int16_t *base,int8_t *out,size_t vl, size_t shift)
      22  {
      23      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
      24      vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
      25      vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
      26      __riscv_vse8_v_u8mf8 (out,v,vl);
      27      __riscv_vse16_v_u16mf4 ((int16_t *)out,v2,vl);
      28  }
      29  
      30  void f3 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
      31  {
      32      for (int i = 0; i < n; i++){
      33        vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
      34        vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
      35        vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
      36        __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
      37        __riscv_vse16_v_u16mf4 ((int16_t *)(out + 200*i),v2,vl);
      38      }
      39  }
      40  
      41  void f4 (int16_t *base,int8_t *out,size_t vl, size_t shift)
      42  {
      43      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base, vl);
      44      vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8(src,shift,vl);
      45      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
      46      v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
      47      vuint16mf4_t v2 = __riscv_vadd_vv_u16mf4 (src, src,vl);
      48      __riscv_vse8_v_u8mf8 (out,v,vl);
      49      __riscv_vse16_v_u16mf4 ((int16_t *)out,v2,vl);
      50  }
      51  
      52  void f5 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
      53  {
      54      vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100, vl);
      55      for (int i = 0; i < n; i++){
      56        vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
      57        vuint8mf8_t v = __riscv_vnclipu_wx_u8mf8_m(m,src,shift,vl);
      58        v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
      59        v = __riscv_vle8_v_u8mf8_tu (v, base2, vl);
      60        __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
      61      }
      62  }
      63  
      64  void f6 (int16_t *base,int8_t *out,size_t vl, size_t shift)
      65  {
      66      vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
      67      vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
      68      __riscv_vse8_v_u8m1 (out,v,vl);
      69  }
      70  
      71  void f7 (int16_t *base,int8_t *out,size_t vl, size_t shift)
      72  {
      73      vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
      74      vuint8m1_t src2 = __riscv_vle8_v_u8m1 ((int8_t *)(base + 100), vl);
      75      vuint8m1_t v = __riscv_vnclipu_wx_u8m1_tu(src2,src,shift,vl);
      76      __riscv_vse8_v_u8m1 (out,v,vl);
      77  }
      78  
      79  void f8 (int16_t *base,int8_t *out,size_t vl, size_t shift)
      80  {
      81      vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
      82      vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
      83      vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
      84      __riscv_vse8_v_u8m1 (out,v,vl);
      85      __riscv_vse16_v_u16m2 ((int16_t *)out,v2,vl);
      86  }
      87  
      88  void f9 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
      89  {
      90      for (int i = 0; i < n; i++){
      91        vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
      92        vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
      93        vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
      94        __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
      95        __riscv_vse16_v_u16m2 ((int16_t *)(out + 200*i),v2,vl);
      96      }
      97  }
      98  
      99  void f10 (int16_t *base,int8_t *out,size_t vl, size_t shift)
     100  {
     101      vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
     102      vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
     103      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     104      v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     105      vuint16m2_t v2 = __riscv_vadd_vv_u16m2 (src, src,vl);
     106      __riscv_vse8_v_u8m1 (out,v,vl);
     107      __riscv_vse16_v_u16m2 ((int16_t *)out,v2,vl);
     108  }
     109  
     110  void f11 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
     111  {
     112      vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100, vl);
     113      for (int i = 0; i < n; i++){
     114        vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
     115        vuint8m1_t v = __riscv_vnclipu_wx_u8m1_m(m,src,shift,vl);
     116        v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     117        v = __riscv_vle8_v_u8m1_tu (v, base2, vl);
     118        __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
     119      }
     120  }
     121  
     122  void f12 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
     123  {
     124      vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000), vl);
     125      for (int i = 0; i < n; i++){
     126        vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
     127        v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
     128        v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
     129        v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
     130        v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
     131        v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
     132        v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
     133        __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
     134      }
     135  }
     136  
     137  void f13 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
     138  {
     139      vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000), vl);
     140      for (int i = 0; i < n; i++){
     141        vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
     142        v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     143        v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     144        v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     145        v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     146        v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     147        v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     148        __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
     149      }
     150  }
     151  
     152  void f14 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
     153  {
     154      for (int i = 0; i < n; i++){
     155        vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000 * i), vl);
     156        vuint16mf4_t src = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
     157        v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
     158        v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
     159        v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
     160        v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
     161        v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
     162        v = __riscv_vnclipu_wx_u8mf8_tu(v,src,shift,vl);
     163        __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
     164      }
     165  }
     166  
     167  void f15 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
     168  {
     169      for (int i = 0; i < n; i++){
     170        vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000 * i), vl);
     171        vuint16m2_t src = __riscv_vle16_v_u16m2 (base + 100*i, vl);
     172        v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     173        v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     174        v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     175        v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     176        v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     177        v = __riscv_vnclipu_wx_u8m1_tu(v,src,shift,vl);
     178        __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
     179      }
     180  }
     181  
     182  void f16 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
     183  {
     184      for (int i = 0; i < n; i++){
     185        vuint8mf8_t v = __riscv_vle8_v_u8mf8 ((int8_t *)(base + 1000 * i), vl);
     186        vuint16mf4_t src1 = __riscv_vle16_v_u16mf4 (base + 100*i, vl);
     187        vuint16mf4_t src2 = __riscv_vle16_v_u16mf4 (base + 200*i, vl);
     188        vuint16mf4_t src3 = __riscv_vle16_v_u16mf4 (base + 300*i, vl);
     189        vuint16mf4_t src4 = __riscv_vle16_v_u16mf4 (base + 400*i, vl);
     190        vuint16mf4_t src5 = __riscv_vle16_v_u16mf4 (base + 500*i, vl);
     191        vuint16mf4_t src6 = __riscv_vle16_v_u16mf4 (base + 600*i, vl);
     192        v = __riscv_vnclipu_wx_u8mf8_tu(v,src1,shift,vl);
     193        v = __riscv_vnclipu_wx_u8mf8_tu(v,src2,shift,vl);
     194        v = __riscv_vnclipu_wx_u8mf8_tu(v,src3,shift,vl);
     195        v = __riscv_vnclipu_wx_u8mf8_tu(v,src4,shift,vl);
     196        v = __riscv_vnclipu_wx_u8mf8_tu(v,src5,shift,vl);
     197        v = __riscv_vnclipu_wx_u8mf8_tu(v,src6,shift,vl);
     198        __riscv_vse8_v_u8mf8 (out + 100*i,v,vl);
     199      }
     200  }
     201  
     202  void f17 (int16_t *base,int8_t *out,size_t vl, int n, size_t shift)
     203  {
     204      for (int i = 0; i < n; i++){
     205        vuint8m1_t v = __riscv_vle8_v_u8m1 ((int8_t *)(base + 1000 * i), vl);
     206        vuint16m2_t src1 = __riscv_vle16_v_u16m2 (base + 100*i, vl);
     207        vuint16m2_t src2 = __riscv_vle16_v_u16m2 (base + 200*i, vl);
     208        vuint16m2_t src3 = __riscv_vle16_v_u16m2 (base + 300*i, vl);
     209        vuint16m2_t src4 = __riscv_vle16_v_u16m2 (base + 400*i, vl);
     210        vuint16m2_t src5 = __riscv_vle16_v_u16m2 (base + 500*i, vl);
     211        vuint16m2_t src6 = __riscv_vle16_v_u16m2 (base + 600*i, vl);
     212        v = __riscv_vnclipu_wx_u8m1_tu(v,src1,shift,vl);
     213        v = __riscv_vnclipu_wx_u8m1_tu(v,src2,shift,vl);
     214        v = __riscv_vnclipu_wx_u8m1_tu(v,src3,shift,vl);
     215        v = __riscv_vnclipu_wx_u8m1_tu(v,src4,shift,vl);
     216        v = __riscv_vnclipu_wx_u8m1_tu(v,src5,shift,vl);
     217        v = __riscv_vnclipu_wx_u8m1_tu(v,src6,shift,vl);
     218        __riscv_vse8_v_u8m1 (out + 100*i,v,vl);
     219      }
     220  }
     221  
     222  void f18 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
     223  {
     224      vuint32mf2_t src = __riscv_vle32_v_u32mf2 (base + 100, vl);
     225      for (int i = 0; i < n; i++){
     226        vbool64_t m = __riscv_vlm_v_b64 (base + i, vl);
     227        vuint16mf4_t v = __riscv_vnclipu_wx_u16mf4_m(m,src,shift,vl);
     228        vuint16mf4_t v2 = __riscv_vle16_v_u16mf4_tu (v, base2 + i, vl);
     229        vuint8mf8_t v3 = __riscv_vnclipu_wx_u8mf8_m(m,v2,shift,vl);
     230        __riscv_vse8_v_u8mf8 (out + 100*i,v3,vl);
     231      }
     232  }
     233  
     234  void f19 (void *base,void *base2,void *out,size_t vl, int n, size_t shift)
     235  {
     236      vuint32m4_t src = __riscv_vle32_v_u32m4 (base + 100, vl);
     237      for (int i = 0; i < n; i++){
     238        vbool8_t m = __riscv_vlm_v_b8 (base + i, vl);
     239        vuint16m2_t v = __riscv_vnclipu_wx_u16m2_m(m,src,shift,vl);
     240        vuint16m2_t v2 = __riscv_vle16_v_u16m2_tu (v, base2 + i, vl);
     241        vuint8m1_t v3 = __riscv_vnclipu_wx_u8m1_m(m,v2,shift,vl);
     242        vuint8m1_t v4 = __riscv_vnclipu_wx_u8m1_tumu(m,v3,v2,shift,vl);
     243        __riscv_vse8_v_u8m1 (out + 100*i,v3,vl);
     244        __riscv_vse8_v_u8m1 (out + 222*i,v4,vl);
     245      }
     246  }
     247  
     248  void f20 (int16_t *base,int8_t *out,size_t vl, size_t shift)
     249  {
     250      vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
     251      /* Only allow load v30,v31.  */
     252      asm volatile("#" ::
     253  		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
     254  		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
     255  		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
     256  		   "v26", "v27", "v28", "v29");
     257  
     258      vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
     259      /* Only allow vncvt SRC == DEST v30.  */
     260      asm volatile("#" ::                                                        
     261  		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
     262  		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
     263  		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
     264  		   "v26", "v27", "v28", "v29", "v31");
     265  
     266      __riscv_vse8_v_u8m1 (out,v,vl);
     267  }
     268  
     269  void f21 (int16_t *base,int8_t *out,size_t vl, size_t shift)
     270  {
     271      vuint16m1_t src = __riscv_vle16_v_u16m1 (base, vl);
     272      /* Only allow load v31.  */
     273      asm volatile("#" ::
     274  		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
     275  		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
     276  		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
     277  		   "v26", "v27", "v28", "v29", "v30");
     278  
     279      vuint8mf2_t v = __riscv_vnclipu_wx_u8mf2(src,shift,vl);
     280      /* Only allow vncvt SRC == DEST v31.  */
     281      asm volatile("#" ::
     282  		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
     283  		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
     284  		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
     285  		   "v26", "v27", "v28", "v29", "v30");
     286  
     287      __riscv_vse8_v_u8mf2 (out,v,vl);
     288  }
     289  
     290  void f22 (int16_t *base,int8_t *out,size_t vl, size_t shift)
     291  {
     292      vuint16m2_t src = __riscv_vle16_v_u16m2 (base, vl);
     293      /* Only allow load v30,v31.  */
     294      asm volatile("#" ::
     295  		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
     296  		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 
     297  		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",  
     298  		   "v26", "v27", "v28", "v29");
     299  
     300      vuint8m1_t v = __riscv_vnclipu_wx_u8m1(src,shift,vl);
     301      /* Only allow v29.  */
     302      asm volatile("#" ::                                                        
     303  		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
     304  		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
     305  		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
     306  		   "v26", "v27", "v28", "v30", "v31");
     307      v = __riscv_vadd_vv_u8m1 (v,v,vl);
     308      /* Only allow v29.  */
     309      asm volatile("#" ::                                                        
     310  		 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 
     311  		   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",     
     312  		   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",     
     313  		   "v26", "v27", "v28", "v30", "v31");
     314  
     315      __riscv_vse8_v_u8m1 (out,v,vl);
     316  }
     317  
     318  /* { dg-final { scan-assembler-not {vmv} } } */
     319  /* { dg-final { scan-assembler-not {csrr} } } */