1  #include <arm_neon.h>
       2  #include "arm-neon-ref.h"
       3  #include "compute-ref-data.h"
       4  
       5  /* Expected results.  */
       6  VECT_VAR_DECL(expected,int,8,8) [] = { 0xc0, 0xc4, 0xc8, 0xcc,
       7  				       0xd0, 0xd4, 0xd8, 0xdc };
       8  VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6 };
       9  VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe0, 0xffffffe2 };
      10  VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffc0 };
      11  VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
      12  					0xff, 0xff, 0xff, 0xff };
      13  VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
      14  VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
      15  VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff };
      16  VECT_VAR_DECL(expected,int,8,16) [] = { 0xc0, 0xc4, 0xc8, 0xcc,
      17  					0xd0, 0xd4, 0xd8, 0xdc,
      18  					0xe0, 0xe4, 0xe8, 0xec,
      19  					0xf0, 0xf4, 0xf8, 0xfc };
      20  VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe2, 0xffe4, 0xffe6,
      21  					0xffe8, 0xffea, 0xffec, 0xffee };
      22  VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe2,
      23  					0xffffffe4, 0xffffffe6 };
      24  VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffc0, 0xffffffffffffffc4 };
      25  VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
      26  					 0xff, 0xff, 0xff, 0xff,
      27  					 0xff, 0xff, 0xff, 0xff,
      28  					 0xff, 0xff, 0xff, 0xff };
      29  VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
      30  					 0xffff, 0xffff, 0xffff, 0xffff };
      31  VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
      32  					 0xffffffff, 0xffffffff };
      33  VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,
      34  					 0xffffffffffffffff };
      35  
      36  /* Expected results with max positive input.  */
      37  VECT_VAR_DECL(expected_max,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
      38  					   0x7f, 0x7f, 0x7f, 0x7f };
      39  VECT_VAR_DECL(expected_max,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
      40  VECT_VAR_DECL(expected_max,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
      41  VECT_VAR_DECL(expected_max,int,64,1) [] = { 0x7fffffffffffffff };
      42  VECT_VAR_DECL(expected_max,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
      43  					    0xff, 0xff, 0xff, 0xff };
      44  VECT_VAR_DECL(expected_max,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
      45  VECT_VAR_DECL(expected_max,uint,32,2) [] = { 0xffffffff, 0xffffffff };
      46  VECT_VAR_DECL(expected_max,uint,64,1) [] = { 0xffffffffffffffff };
      47  VECT_VAR_DECL(expected_max,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
      48  					    0x7f, 0x7f, 0x7f, 0x7f,
      49  					    0x7f, 0x7f, 0x7f, 0x7f,
      50  					    0x7f, 0x7f, 0x7f, 0x7f };
      51  VECT_VAR_DECL(expected_max,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
      52  					    0x7fff, 0x7fff, 0x7fff, 0x7fff };
      53  VECT_VAR_DECL(expected_max,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
      54  					    0x7fffffff, 0x7fffffff };
      55  VECT_VAR_DECL(expected_max,int,64,2) [] = { 0x7fffffffffffffff,
      56  					    0x7fffffffffffffff };
      57  VECT_VAR_DECL(expected_max,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
      58  					     0xff, 0xff, 0xff, 0xff,
      59  					     0xff, 0xff, 0xff, 0xff,
      60  					     0xff, 0xff, 0xff, 0xff };
      61  VECT_VAR_DECL(expected_max,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
      62  					     0xffff, 0xffff, 0xffff, 0xffff };
      63  VECT_VAR_DECL(expected_max,uint,32,4) [] = { 0xffffffff, 0xffffffff,
      64  					     0xffffffff, 0xffffffff };
      65  VECT_VAR_DECL(expected_max,uint,64,2) [] = { 0xffffffffffffffff,
      66  					     0xffffffffffffffff };
      67  
      68  #define INSN vqshl
      69  #define TEST_MSG "VQSHL_N/VQSHLQ_N"
      70  
      71  #define FNNAME1(NAME) void exec_ ## NAME ##_n (void)
      72  #define FNNAME(NAME) FNNAME1(NAME)
      73  
      74  FNNAME (INSN)
      75  {
      76    /* Basic test: v2=vqshl_n(v1,v), then store the result.  */
      77  #define TEST_VQSHL_N2(INSN, Q, T1, T2, W, N, V, CMT) \
      78    Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
      79    VECT_VAR(vector_res, T1, W, N) =					\
      80      INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
      81  			V);						\
      82    vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
      83  		    VECT_VAR(vector_res, T1, W, N))
      84  
      85    /* Two auxliary macros are necessary to expand INSN */
      86  #define TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N, CMT) \
      87    TEST_VQSHL_N2(INSN, T3, Q, T1, T2, W, N, CMT)
      88  
      89  #define TEST_VQSHL_N(T3, Q, T1, T2, W, N, CMT)	\
      90    TEST_VQSHL_N1(INSN, T3, Q, T1, T2, W, N, CMT)
      91  
      92    DECL_VARIABLE_ALL_VARIANTS(vector);
      93    DECL_VARIABLE_ALL_VARIANTS(vector_res);
      94  
      95    clean_results ();
      96  
      97    TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
      98  
      99    /* Choose shift amount arbitrarily.  */
     100  #define CMT ""
     101    TEST_VQSHL_N(, int, s, 8, 8, 2, CMT);
     102    TEST_VQSHL_N(, int, s, 16, 4, 1, CMT);
     103    TEST_VQSHL_N(, int, s, 32, 2, 1, CMT);
     104    TEST_VQSHL_N(, int, s, 64, 1, 2, CMT);
     105    TEST_VQSHL_N(, uint, u, 8, 8, 3, CMT);
     106    TEST_VQSHL_N(, uint, u, 16, 4, 2, CMT);
     107    TEST_VQSHL_N(, uint, u, 32, 2, 3, CMT);
     108    TEST_VQSHL_N(, uint, u, 64, 1, 3, CMT);
     109  
     110    TEST_VQSHL_N(q, int, s, 8, 16, 2, CMT);
     111    TEST_VQSHL_N(q, int, s, 16, 8, 1, CMT);
     112    TEST_VQSHL_N(q, int, s, 32, 4, 1, CMT);
     113    TEST_VQSHL_N(q, int, s, 64, 2, 2, CMT);
     114    TEST_VQSHL_N(q, uint, u, 8, 16, 3, CMT);
     115    TEST_VQSHL_N(q, uint, u, 16, 8, 2, CMT);
     116    TEST_VQSHL_N(q, uint, u, 32, 4, 3, CMT);
     117    TEST_VQSHL_N(q, uint, u, 64, 2, 3, CMT);
     118  
     119    CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, CMT);
     120    CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
     121    CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
     122    CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, CMT);
     123    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
     124    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
     125    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
     126    CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
     127    CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
     128    CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
     129    CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
     130    CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, CMT);
     131    CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
     132    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
     133    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
     134    CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
     135  
     136  
     137    /* Fill input vector with max value, to check saturation on limits.  */
     138    VDUP(vector, , int, s, 8, 8, 0x7F);
     139    VDUP(vector, , int, s, 16, 4, 0x7FFF);
     140    VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
     141    VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
     142    VDUP(vector, , uint, u, 8, 8, 0xFF);
     143    VDUP(vector, , uint, u, 16, 4, 0xFFFF);
     144    VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF);
     145    VDUP(vector, , uint, u, 64, 1, 0xFFFFFFFFFFFFFFFFULL);
     146    VDUP(vector, q, int, s, 8, 16, 0x7F);
     147    VDUP(vector, q, int, s, 16, 8, 0x7FFF);
     148    VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
     149    VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
     150    VDUP(vector, q, uint, u, 8, 16, 0xFF);
     151    VDUP(vector, q, uint, u, 16, 8, 0xFFFF);
     152    VDUP(vector, q, uint, u, 32, 4, 0xFFFFFFFF);
     153    VDUP(vector, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
     154  
     155  #undef CMT
     156  #define CMT " (with max input)"
     157    TEST_VQSHL_N(, int, s, 8, 8, 2, CMT);
     158    TEST_VQSHL_N(, int, s, 16, 4, 1, CMT);
     159    TEST_VQSHL_N(, int, s, 32, 2, 1, CMT);
     160    TEST_VQSHL_N(, int, s, 64, 1, 2, CMT);
     161    TEST_VQSHL_N(, uint, u, 8, 8, 3, CMT);
     162    TEST_VQSHL_N(, uint, u, 16, 4, 2, CMT);
     163    TEST_VQSHL_N(, uint, u, 32, 2, 3, CMT);
     164    TEST_VQSHL_N(, uint, u, 64, 1, 3, CMT);
     165  
     166    TEST_VQSHL_N(q, int, s, 8, 16, 2, CMT);
     167    TEST_VQSHL_N(q, int, s, 16, 8, 1, CMT);
     168    TEST_VQSHL_N(q, int, s, 32, 4, 1, CMT);
     169    TEST_VQSHL_N(q, int, s, 64, 2, 2, CMT);
     170    TEST_VQSHL_N(q, uint, u, 8, 16, 3, CMT);
     171    TEST_VQSHL_N(q, uint, u, 16, 8, 2, CMT);
     172    TEST_VQSHL_N(q, uint, u, 32, 4, 3, CMT);
     173    TEST_VQSHL_N(q, uint, u, 64, 2, 3, CMT);
     174  
     175    CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_max, CMT);
     176    CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_max, CMT);
     177    CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_max, CMT);
     178    CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_max, CMT);
     179    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_max, CMT);
     180    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max, CMT);
     181    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max, CMT);
     182    CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max, CMT);
     183    CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max, CMT);
     184    CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max, CMT);
     185    CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max, CMT);
     186    CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max, CMT);
     187    CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max, CMT);
     188    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max, CMT);
     189    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max, CMT);
     190    CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max, CMT);
     191  }
     192  
     193  int main (void)
     194  {
     195    exec_vqshl_n ();
     196    return 0;
     197  }