(root)/
gcc-13.2.0/
gcc/
testsuite/
gcc.dg/
unroll-and-jam.c
       1  /* { dg-do run } */
       2  /* { dg-options "-O3 -floop-unroll-and-jam -fno-tree-loop-im --param unroll-jam-min-percent=0 -fdump-tree-unrolljam-details" } */
       3  /* { dg-additional-options "--param max-completely-peel-times=16" { target { s390*-*-* } } } */
       4  /* { dg-require-effective-target int32plus } */
       5  
       6  #include <stdio.h>
       7  extern unsigned int a[];
       8  extern unsigned int b[];
       9  extern unsigned int aa[][1024];
      10  unsigned int checksum;
      11  void checkaa(void)
      12  {
      13    unsigned sum = 1;
      14    unsigned long i, j;
      15    for (i = 0; i < 1024; i++) {
      16        for (j = 0; j < 16; j++) {
      17  	  sum += aa[j][i]*31+47;
      18        }
      19    }
      20    checksum = checksum * 27 + sum;
      21    //printf("  %d\n", sum);
      22  }
      23  
      24  void checkb(void)
      25  {
      26    unsigned sum = 1;
      27    unsigned long i, j;
      28    for (i = 0; i < 1024; i++) {
      29        sum += b[i]*31+47;
      30    }
      31    checksum = checksum * 27 + sum;
      32    //printf("  %d\n", sum);
      33  }
      34  
      35  #define TEST(name, body, test) \
      36  static void __attribute__((noinline,noclone)) name (unsigned long n, unsigned long m) \
      37  { \
      38    unsigned i, j; \
      39    for (i = 1; i < m; i++) { \
      40        for (j = 1; j < n; j++) { \
      41  	  body; \
      42        } \
      43    } \
      44    test; \
      45  } \
      46  static void __attribute__((noinline,noclone,optimize("O1"))) name ## noopt (unsigned long n, unsigned long m) \
      47  { \
      48    unsigned long i, j; \
      49    for (i = 1; i < m; i++) { \
      50        for (j = 1; j < n; j++) { \
      51  	  body; \
      52        } \
      53    } \
      54    test; \
      55  }
      56  TEST(foo1, aa[i+1][j+1]=aa[i][j] * aa[i][j] / 2, checkaa()) //ok, -1,-1
      57  TEST(foo2, aa[i][j+1]=3*aa[i+1][j], checkaa()) //notok, 1,-1
      58  TEST(foo3, aa[i+1][j-1]=aa[i][j] * aa[i][j] / 2, checkaa()) //notok, -1,1
      59  TEST(foo4, aa[i][j] = aa[i-1][j+1] * aa[i-1][j+1] / 2, checkaa()) //notok, -1,1
      60  TEST(foo5, aa[i][j] = aa[i+1][j+1] * aa[i+1][j+1] / 2, checkaa()) //ok, 1,1
      61  TEST(foo6, aa[i][j] = aa[i+1][j] * aa[i+1][j] / 2, checkaa()) //ok, -1,0
      62  TEST(foo61, aa[i][0] = aa[i+1][0] * aa[i+1][0] / 2, checkaa()) //notok, -1,0
      63  TEST(foo62, aa[i][j/2] = aa[i+1][j/2] * aa[i+1][j/2] / 2, checkaa()) //notok, not affine
      64  TEST(foo63, aa[i][j%2] = aa[i+1][j%2] * aa[i+1][j%2] / 2, checkaa()) //notok, not affine
      65  TEST(foo7, aa[i+1][j] = aa[i][j] * aa[i][j] / 2, checkaa()) //ok, 1,0
      66  TEST(foo9, b[j] = 3*b[j+1] + 1, checkb()) //notok, 0,-1
      67  TEST(foo10, b[j] = 3*b[j] + 1, checkb()) //ok, 0,0
      68  extern int f;
      69  TEST(foo11, f = b[i-1] = 1 + 3* b[i+1], checkb()) //ok, 2,0 but must reduce unroll factor to 2, (it would be incorrect with unroll-by-3, which the profitability would suggest)
      70  
      71  /* foo8 should work as well, but currently doesn't because the distance
      72     vectors we compute are too pessimistic.  We compute
      73       (0,1), (1,1) and (1,-1) 
      74     and the last one causes us to lose.  */
      75  TEST(foo8, b[j+1] = 3*b[j] + 1, checkb()) //ok, 0,1
      76  
      77  int f;
      78  unsigned int a[1024];
      79  unsigned int b[1024];
      80  unsigned int aa[16][1024];
      81  void init(void)
      82  {
      83    unsigned long i,j;
      84    for (i = 0; i < 1024; i++) {
      85        for (j = 0; j < 16; j++) {
      86  	  aa[j][i] = ((j+1)*2+i+1) % 17;
      87        }
      88        a[i] = ((i+1)*31) % 19;
      89        b[i] = ((i+1)*47) % 23;
      90    }
      91    checksum = 1;
      92  }
      93  
      94  #define RUN(name) \
      95      printf(" %s\n", #name); \
      96      init();for(i=0;i<4;i++)name##noopt(32,8); checka = checksum; \
      97      init();for(i=0;i<4;i++)name(32,8); \
      98      if (checka != checksum) fail = 1; \
      99      printf("%sok %s\n", checka != checksum ? "NOT " : "", #name);
     100  
     101  int main()
     102  {
     103    int fail = 0;
     104    int i;
     105    unsigned checka;
     106    RUN(foo1);
     107    RUN(foo2);
     108    RUN(foo3);
     109    RUN(foo4);
     110    RUN(foo5);
     111    RUN(foo6);
     112    RUN(foo61);
     113    RUN(foo62);
     114    RUN(foo63);
     115    RUN(foo7);
     116    RUN(foo8);
     117    RUN(foo9);
     118    RUN(foo10);
     119    RUN(foo11);
     120    if (fail)
     121      __builtin_abort();
     122    return fail;
     123  }
     124  
     125  /* Six loops should be unroll-jammed (actually seven, but see above).  */
     126  /* { dg-final { scan-tree-dump-times "applying unroll and jam" 6 "unrolljam" } } */