1  #include <arm_neon.h>
       2  #include "arm-neon-ref.h"
       3  #include "compute-ref-data.h"
       4  
       5  /* Expected results with negative input.  */
       6  VECT_VAR_DECL(expected_neg,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
       7  					    0x0, 0x0, 0x0, 0x0 };
       8  VECT_VAR_DECL(expected_neg,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
       9  VECT_VAR_DECL(expected_neg,uint,32,2) [] = { 0x0, 0x0 };
      10  VECT_VAR_DECL(expected_neg,uint,64,1) [] = { 0x0 };
      11  VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
      12  					     0x0, 0x0, 0x0, 0x0,
      13  					     0x0, 0x0, 0x0, 0x0,
      14  					     0x0, 0x0, 0x0, 0x0 };
      15  VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
      16  					     0x0, 0x0, 0x0, 0x0 };
      17  VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
      18  VECT_VAR_DECL(expected_neg,uint,64,2) [] = { 0x0, 0x0 };
      19  
      20  /* Expected results with shift by 1.  */
      21  VECT_VAR_DECL(expected_sh1,uint,8,8) [] = { 0xfe, 0xfe, 0xfe, 0xfe,
      22  					    0xfe, 0xfe, 0xfe, 0xfe };
      23  VECT_VAR_DECL(expected_sh1,uint,16,4) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe };
      24  VECT_VAR_DECL(expected_sh1,uint,32,2) [] = { 0xfffffffe, 0xfffffffe };
      25  VECT_VAR_DECL(expected_sh1,uint,64,1) [] = { 0xfffffffffffffffe };
      26  VECT_VAR_DECL(expected_sh1,uint,8,16) [] = { 0xfe, 0xfe, 0xfe, 0xfe,
      27  					     0xfe, 0xfe, 0xfe, 0xfe,
      28  					     0xfe, 0xfe, 0xfe, 0xfe,
      29  					     0xfe, 0xfe, 0xfe, 0xfe };
      30  VECT_VAR_DECL(expected_sh1,uint,16,8) [] = { 0xfffe, 0xfffe, 0xfffe, 0xfffe,
      31  					     0xfffe, 0xfffe, 0xfffe, 0xfffe };
      32  VECT_VAR_DECL(expected_sh1,uint,32,4) [] = { 0xfffffffe, 0xfffffffe,
      33  					     0xfffffffe, 0xfffffffe };
      34  VECT_VAR_DECL(expected_sh1,uint,64,2) [] = { 0xfffffffffffffffe,
      35  					     0xfffffffffffffffe };
      36  
      37  /* Expected results with shift by 2.  */
      38  VECT_VAR_DECL(expected_sh2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
      39  					    0xff, 0xff, 0xff, 0xff };
      40  VECT_VAR_DECL(expected_sh2,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
      41  VECT_VAR_DECL(expected_sh2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
      42  VECT_VAR_DECL(expected_sh2,uint,64,1) [] = { 0xffffffffffffffff };
      43  VECT_VAR_DECL(expected_sh2,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
      44  					     0xff, 0xff, 0xff, 0xff,
      45  					     0xff, 0xff, 0xff, 0xff,
      46  					     0xff, 0xff, 0xff, 0xff };
      47  VECT_VAR_DECL(expected_sh2,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
      48  					     0xffff, 0xffff, 0xffff, 0xffff };
      49  VECT_VAR_DECL(expected_sh2,uint,32,4) [] = { 0xffffffff, 0xffffffff,
      50  					     0xffffffff, 0xffffffff };
      51  VECT_VAR_DECL(expected_sh2,uint,64,2) [] = { 0xffffffffffffffff,
      52  					     0xffffffffffffffff };
      53  
      54  /* Expected results.  */
      55  VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 };
      56  VECT_VAR_DECL(expected,uint,16,4) [] = { 0x8, 0x8, 0x8, 0x8 };
      57  VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x18 };
      58  VECT_VAR_DECL(expected,uint,64,1) [] = { 0x40 };
      59  VECT_VAR_DECL(expected,uint,8,16) [] = { 0xa0, 0xa0, 0xa0, 0xa0,
      60  					 0xa0, 0xa0, 0xa0, 0xa0,
      61  					 0xa0, 0xa0, 0xa0, 0xa0,
      62  					 0xa0, 0xa0, 0xa0, 0xa0 };
      63  VECT_VAR_DECL(expected,uint,16,8) [] = { 0x180, 0x180, 0x180, 0x180,
      64  					 0x180, 0x180, 0x180, 0x180 };
      65  VECT_VAR_DECL(expected,uint,32,4) [] = { 0x380, 0x380, 0x380, 0x380 };
      66  VECT_VAR_DECL(expected,uint,64,2) [] = { 0x800, 0x800 };
      67  
      68  
      69  #define INSN vqshlu
      70  #define TEST_MSG "VQSHLU_N/VQSHLUQ_N"
      71  
      72  #define FNNAME1(NAME) void exec_ ## NAME ## _n(void)
      73  #define FNNAME(NAME) FNNAME1(NAME)
      74  
      75  FNNAME (INSN)
      76  {
      77    /* Basic test: v2=vqshlu_n(v1,v), then store the result.  */
      78  #define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V, CMT) \
      79    Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T3, W, N));		\
      80    VECT_VAR(vector_res, T3, W, N) =					\
      81      INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
      82  			V);						\
      83    vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N),				\
      84  		    VECT_VAR(vector_res, T3, W, N))
      85  
      86    /* Two auxliary macros are necessary to expand INSN */
      87  #define TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V, CMT) \
      88    TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V, CMT)
      89  
      90  #define TEST_VQSHLU_N(Q, T1, T2, T3, T4, W, N, V, CMT) \
      91    TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V, CMT)
      92  
      93  
      94    DECL_VARIABLE_ALL_VARIANTS(vector);
      95    DECL_VARIABLE_ALL_VARIANTS(vector_res);
      96  
      97    clean_results ();
      98  
      99    /* Fill input vector with negative values, to check saturation on
     100       limits.  */
     101    VDUP(vector, , int, s, 8, 8, -1);
     102    VDUP(vector, , int, s, 16, 4, -2);
     103    VDUP(vector, , int, s, 32, 2, -3);
     104    VDUP(vector, , int, s, 64, 1, -4);
     105    VDUP(vector, q, int, s, 8, 16, -1);
     106    VDUP(vector, q, int, s, 16, 8, -2);
     107    VDUP(vector, q, int, s, 32, 4, -3);
     108    VDUP(vector, q, int, s, 64, 2, -4);
     109  
     110    /* Choose shift amount arbitrarily.  */
     111  #define CMT " (negative input)"
     112    TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2, CMT);
     113    TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1, CMT);
     114    TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1, CMT);
     115    TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2, CMT);
     116    TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2, CMT);
     117    TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1, CMT);
     118    TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1, CMT);
     119    TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2, CMT);
     120  
     121    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_neg, CMT);
     122    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_neg, CMT);
     123    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_neg, CMT);
     124    CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_neg, CMT);
     125    CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);
     126    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);
     127    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);
     128    CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_neg, CMT);
     129  
     130    
     131    /* Fill input vector with max value, to check saturation on
     132       limits.  */
     133    VDUP(vector, , int, s, 8, 8, 0x7F);
     134    VDUP(vector, , int, s, 16, 4, 0x7FFF);
     135    VDUP(vector, , int, s, 32, 2, 0x7FFFFFFF);
     136    VDUP(vector, , int, s, 64, 1, 0x7FFFFFFFFFFFFFFFLL);
     137    VDUP(vector, q, int, s, 8, 16, 0x7F);
     138    VDUP(vector, q, int, s, 16, 8, 0x7FFF);
     139    VDUP(vector, q, int, s, 32, 4, 0x7FFFFFFF);
     140    VDUP(vector, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFULL);
     141  
     142    /* shift by 1.  */
     143  #undef CMT
     144  #define CMT " (shift by 1)"
     145    TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1, CMT);
     146    TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 1, CMT);
     147    TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 1, CMT);
     148    TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 1, CMT);
     149    TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 1, CMT);
     150    TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 1, CMT);
     151    TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 1, CMT);
     152    TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 1, CMT);
     153  
     154    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh1, CMT);
     155    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh1, CMT);
     156    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh1, CMT);
     157    CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh1, CMT);
     158    CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh1, CMT);
     159    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh1, CMT);
     160    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh1, CMT);
     161    CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh1, CMT);
     162  
     163    /* shift by 2 to force saturation.  */
     164  #undef CMT
     165  #define CMT " (shift by 2)"
     166    TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 2, CMT);
     167    TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2, CMT);
     168    TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 2, CMT);
     169    TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 2, CMT);
     170    TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 2, CMT);
     171    TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 2, CMT);
     172    TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 2, CMT);
     173    TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 2, CMT);
     174  
     175    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_sh2, CMT);
     176    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_sh2, CMT);
     177    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_sh2, CMT);
     178    CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_sh2, CMT);
     179    CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh2, CMT);
     180    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh2, CMT);
     181    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh2, CMT);
     182    CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_sh2, CMT);
     183  
     184    
     185    /* Fill input vector with positive values, to check normal case.  */
     186    VDUP(vector, , int, s, 8, 8, 1);
     187    VDUP(vector, , int, s, 16, 4, 2);
     188    VDUP(vector, , int, s, 32, 2, 3);
     189    VDUP(vector, , int, s, 64, 1, 4);
     190    VDUP(vector, q, int, s, 8, 16, 5);
     191    VDUP(vector, q, int, s, 16, 8, 6);
     192    VDUP(vector, q, int, s, 32, 4, 7);
     193    VDUP(vector, q, int, s, 64, 2, 8);
     194  
     195    /* Arbitrary shift amount.  */
     196  #undef CMT
     197  #define CMT ""
     198    TEST_VQSHLU_N(, int, s, uint, u, 8, 8, 1, CMT);
     199    TEST_VQSHLU_N(, int, s, uint, u, 16, 4, 2, CMT);
     200    TEST_VQSHLU_N(, int, s, uint, u, 32, 2, 3, CMT);
     201    TEST_VQSHLU_N(, int, s, uint, u, 64, 1, 4, CMT);
     202    TEST_VQSHLU_N(q, int, s, uint, u, 8, 16, 5, CMT);
     203    TEST_VQSHLU_N(q, int, s, uint, u, 16, 8, 6, CMT);
     204    TEST_VQSHLU_N(q, int, s, uint, u, 32, 4, 7, CMT);
     205    TEST_VQSHLU_N(q, int, s, uint, u, 64, 2, 8, CMT);
     206  
     207    CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, CMT);
     208    CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, CMT);
     209    CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT);
     210    CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, CMT);
     211    CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
     212    CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
     213    CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
     214    CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, CMT);
     215  }
     216  
     217  int main (void)
     218  {
     219    exec_vqshlu_n ();
     220    return 0;
     221  }