1  /* PR81833: This used to fail due to improper implementation of vec_msum.  */
       2  /* Test case relies on -mcpu=power7 or later.  Currently we don't have
       3     machinery to express that, so we have two separate tests for -mcpu=power7
       4     and -mcpu=power8 to catch 32-bit BE on P7 and 64-bit BE/LE on P8.  */
       5  
       6  /* { dg-do run } */
       7  /* { dg-require-effective-target p8vector_hw } */
       8  /* { dg-options "-mdejagnu-cpu=power8 -O2" } */
       9  
      10  #include <altivec.h>
      11  
      12  #define vec_u8  vector unsigned char
      13  #define vec_s8  vector signed char
      14  #define vec_u16 vector unsigned short
      15  #define vec_s16 vector signed short
      16  #define vec_u32 vector unsigned int
      17  #define vec_s32 vector signed int
      18  #define vec_f   vector float
      19  
      20  #define LOAD_ZERO const vec_u8 zerov = vec_splat_u8 (0)
      21  
      22  #define zero_u8v  (vec_u8)  zerov
      23  #define zero_s8v  (vec_s8)  zerov
      24  #define zero_u16v (vec_u16) zerov
      25  #define zero_s16v (vec_s16) zerov
      26  #define zero_u32v (vec_u32) zerov
      27  #define zero_s32v (vec_s32) zerov
      28  
      29  signed int __attribute__((noinline))
      30  scalarproduct_int16_vsx (const signed short *v1, const signed short *v2,
      31  			 int order)
      32  {
      33    int i;
      34    LOAD_ZERO;
      35    register vec_s16 vec1;
      36    register vec_s32 res = vec_splat_s32 (0), t;
      37    signed int ires;
      38  
      39    for (i = 0; i < order; i += 8) {
      40      vec1 = vec_vsx_ld (0, v1);
      41      t    = vec_msum (vec1, vec_vsx_ld (0, v2), zero_s32v);
      42      res  = vec_sums (t, res);
      43      v1  += 8;
      44      v2  += 8;
      45    }
      46    res = vec_splat (res, 3);
      47    vec_ste (res, 0, &ires);
      48  
      49    return ires;
      50  }
      51  
      52  int main(void)
      53  {
      54    const signed short test_vec[] = { 1, 1, 1, 1, 1, 1, 1, 1 };
      55    if (scalarproduct_int16_vsx (test_vec, test_vec, 8) != 8)
      56      __builtin_abort ();
      57    return 0;
      58  }