(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.target/
powerpc/
mma-single-test.c
       1  /* { dg-do run { target { power10_hw } } } */
       2  /* { dg-do link { target { ! power10_hw } } } */
       3  /* { dg-require-effective-target power10_ok } */
       4  /* { dg-require-effective-target ppc_mma_hw } */
       5  /* { dg-options "-mdejagnu-cpu=power10 -O2" } */
       6  
       7  #include <stdio.h>
       8  #include <stdlib.h>
       9  #include <altivec.h>
      10  
      11  typedef unsigned char vec_t __attribute__ ((vector_size (16)));
      12  typedef float v4sf_t __attribute__ ((vector_size (16)));
      13  #define SAVE_ACC(ACC, ldc,J)  \
      14  	  __builtin_mma_disassemble_acc (result, ACC); \
      15  	  rowC = (v4sf_t *) &CO[0*ldc+J]; \
      16            rowC[0] += result[0]; \
      17            rowC = (v4sf_t *) &CO[1*ldc+J]; \
      18            rowC[0] += result[1]; \
      19            rowC = (v4sf_t *) &CO[2*ldc+J]; \
      20            rowC[0] += result[2]; \
      21            rowC = (v4sf_t *) &CO[3*ldc+J]; \
      22  	  rowC[0] += result[3];
      23  
      24  #define SAVE_ACC1(ACC,ldc, J)  \
      25  	  __builtin_mma_disassemble_acc (result, ACC); \
      26  	  rowC = (v4sf_t *) &CO[4* ldc+J]; \
      27            rowC[0] += result[0]; \
      28            rowC = (v4sf_t *) &CO[5*ldc+J]; \
      29            rowC[0] += result[1]; \
      30            rowC = (v4sf_t *) &CO[6*ldc+J]; \
      31            rowC[0] += result[2]; \
      32            rowC = (v4sf_t *) &CO[7*ldc+J]; \
      33  	  rowC[0] += result[3];
      34  void
      35  MMA (int m, int n, int k, float *A, float *B, float *C)
      36  {
      37    __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
      38    v4sf_t result[4];
      39    v4sf_t *rowC;
      40    for (int l = 0; l < n; l += 8)
      41      {
      42        float *CO;
      43        float *AO;
      44        AO = A;
      45        CO = C;
      46        C += m * 8;
      47        for (int j = 0; j < m; j += 16)
      48  	{
      49  	  float *BO = B;
      50  	  __builtin_mma_xxsetaccz (&acc0);
      51  	  __builtin_mma_xxsetaccz (&acc1);
      52  	  __builtin_mma_xxsetaccz (&acc2);
      53  	  __builtin_mma_xxsetaccz (&acc3);
      54  	  __builtin_mma_xxsetaccz (&acc4);
      55  	  __builtin_mma_xxsetaccz (&acc5);
      56  	  __builtin_mma_xxsetaccz (&acc6);
      57  	  __builtin_mma_xxsetaccz (&acc7);
      58  	  unsigned long i;
      59  
      60  	  for (i = 0; i < k; i++)
      61  	    {
      62  	      vec_t *rowA = (vec_t *) & AO[i * 16];
      63  	      vec_t *rowB = (vec_t *) & BO[i * 8];
      64  	      __builtin_mma_xvf32gerpp (&acc0, rowB[0], rowA[0]);
      65  	      __builtin_mma_xvf32gerpp (&acc1, rowB[1], rowA[0]);
      66  	      __builtin_mma_xvf32gerpp (&acc2, rowB[0], rowA[1]);
      67  	      __builtin_mma_xvf32gerpp (&acc3, rowB[1], rowA[1]);
      68  	      __builtin_mma_xvf32gerpp (&acc4, rowB[0], rowA[2]);
      69  	      __builtin_mma_xvf32gerpp (&acc5, rowB[1], rowA[2]);
      70  	      __builtin_mma_xvf32gerpp (&acc6, rowB[0], rowA[3]);
      71  	      __builtin_mma_xvf32gerpp (&acc7, rowB[1], rowA[3]);
      72  	    }
      73  	  SAVE_ACC (&acc0, m, 0);
      74  	  SAVE_ACC (&acc2, m, 4);
      75  	  SAVE_ACC1 (&acc1, m, 0);
      76  	  SAVE_ACC1 (&acc3, m, 4);
      77  	  SAVE_ACC (&acc4, m, 8);
      78  	  SAVE_ACC (&acc6, m, 12);
      79  	  SAVE_ACC1 (&acc5, m, 8);
      80  	  SAVE_ACC1 (&acc7, m, 12);
      81  	  AO += k * 16;
      82  	  BO += k * 8;
      83  	  CO += 16;
      84  	}
      85        B += k * 8;
      86      }
      87  }
      88  
      89  void
      90  init (float *matrix, int row, int column)
      91  {
      92    for (int j = 0; j < column; j++)
      93      {
      94        for (int i = 0; i < row; i++)
      95  	{
      96  	  matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
      97  	}
      98      }
      99  }
     100  
     101  void
     102  init0 (float *matrix, float *matrix1, int row, int column)
     103  {
     104    for (int j = 0; j < column; j++)
     105      for (int i = 0; i < row; i++)
     106        matrix[j * row + i] = matrix1[j * row + i] = 0;
     107  }
     108  
     109  
     110  void
     111  print (const char *name, const float *matrix, int row, int column)
     112  {
     113    printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
     114    for (int i = 0; i < row; i++)
     115      {
     116        for (int j = 0; j < column; j++)
     117  	{
     118  	  printf ("%f ", matrix[j * row + i]);
     119  	}
     120        printf ("\n");
     121      }
     122    printf ("\n");
     123  }
     124  
     125  int
     126  main (int argc, char *argv[])
     127  {
     128    int rowsA, colsB, common;
     129    int i, j, k;
     130    int ret = 0;
     131  
     132    for (int t = 16; t <= 128; t += 16)
     133      {
     134        for (int t1 = 8; t1 <= 16; t1 += 8)
     135  	{
     136  	  rowsA = t;
     137  	  colsB = t1;
     138  	  common = 1;
     139  	  /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
     140  	  float A[rowsA * common];
     141  	  float B[common * colsB];
     142  	  float C[rowsA * colsB];
     143  	  float D[rowsA * colsB];
     144  
     145  
     146  	  init (A, rowsA, common);
     147  	  init (B, common, colsB);
     148  	  init0 (C, D, rowsA, colsB);
     149  	  MMA (rowsA, colsB, common, A, B, C);
     150  
     151  	  for (i = 0; i < colsB; i++)
     152  	    {
     153  	      for (j = 0; j < rowsA; j++)
     154  		{
     155  		  D[i * rowsA + j] = 0;
     156  		  for (k = 0; k < common; k++)
     157  		    {
     158  		      D[i * rowsA + j] +=
     159  			A[k * rowsA + j] * B[k + common * i];
     160  		    }
     161  		}
     162  	    }
     163  	  for (i = 0; i < colsB; i++)
     164  	    {
     165  	      for (j = 0; j < rowsA; j++)
     166  		{
     167  		  for (k = 0; k < common; k++)
     168  		    {
     169  		      if (D[i * rowsA + j] != C[i * rowsA + j])
     170  			{
     171  			  printf ("Error %d,%d,%d\n",i,j,k);
     172  			  ret++;
     173  			}
     174  		    }
     175  		}
     176  	    }
     177  	  if (ret)
     178  	    {
     179  	      print ("A", A, rowsA, common);
     180  	      print ("B", B, common, colsB);
     181  	      print ("C", C, rowsA, colsB);
     182  	      print ("D", D, rowsA, colsB);
     183  	    }
     184  	}
     185      }
     186  
     187  #ifdef VERBOSE
     188    if (ret)
     189      printf ("MMA single test fail: %d errors\n",ret);
     190    else
     191      printf ("MMA single test success: 0 MMA errors\n");
     192  #else
     193    if (ret)
     194      abort();
     195  #endif
     196        
     197    return ret;
     198  }