1  // { dg-do run }
       2  // { dg-options "-std=c99 -O3" }
       3  
       4  #include <stdlib.h>
       5  #include <stdbool.h>
       6  
       7  #define COUNT 1000
       8  #define MAX 1000
       9  #define ALIGNMENT (2*1024*1024) // 2MB
      10  
      11  _Complex double conj(_Complex double);
      12  _Complex float conjf(_Complex float);
      13  
      14  unsigned int device = 0;
      15  
      16  // cmul
      17  
      18  void cmulF(float *td, float *te, float *tf, float *tg, int tas)
      19  {
      20    typedef _Complex float complexT;
      21    int array_size = tas/2;
      22    complexT *d = (complexT*)(td);
      23    complexT *e = (complexT*)(te);
      24    complexT *f = (complexT*)(tf);
      25    for (int i = 0; i < array_size; i++)
      26      {
      27        d[i] = e[i] * f[i];
      28      }
      29  }
      30  
      31  __attribute__((optimize("no-tree-vectorize")))
      32  bool cmulFcheck(float *td, float *te, float *tf, float *tg, int tas)
      33  {
      34    for (int i = 0; i < tas/2; ++i)
      35      {
      36        float a = te[i*2];
      37        float b = te[i*2+1];
      38        float c = tf[i*2];
      39        float d = tf[i*2+1];
      40        if (td[i*2] != a*c-b*d || td[i*2+1] != a*d+b*c)
      41          return false;
      42      }
      43    return true;
      44  }
      45  
      46  void cmulD(double *td, double *te, double *tf, double *tg, int tas)
      47  {
      48    typedef _Complex double complexT;
      49    int array_size = tas/2;
      50    complexT *d = (complexT*)(td);
      51    complexT *e = (complexT*)(te);
      52    complexT *f = (complexT*)(tf);
      53    for (int i = 0; i < array_size; i++)
      54      {
      55        d[i] = e[i] * f[i];
      56      }
      57  }
      58  
      59  __attribute__((optimize("no-tree-vectorize")))
      60  bool cmulDcheck(double *td, double *te, double *tf, double *tg, int tas)
      61  {
      62    for (int i = 0; i < tas/2; ++i)
      63      {
      64        double a = te[i*2];
      65        double b = te[i*2+1];
      66        double c = tf[i*2];
      67        double d = tf[i*2+1];
      68        if (td[i*2] != a*c-b*d || td[i*2+1] != a*d+b*c)
      69          return false;
      70      }
      71    return true;
      72  }
      73  
      74  
      75  // cmul_conj
      76  
      77  void cmul_conjF(float *td, float *te, float *tf, float *tg, int tas)
      78  {
      79    typedef _Complex float complexT;
      80    int array_size = tas/2;
      81    complexT *d = (complexT*)(td);
      82    complexT *e = (complexT*)(te);
      83    complexT *f = (complexT*)(tf);
      84    for (int i = 0; i < array_size; i++)
      85      {
      86        d[i] = e[i] * conj(f[i]);
      87      }
      88  }
      89  
      90  __attribute__((optimize("no-tree-vectorize")))
      91  bool cmul_conjFcheck(float *td, float *te, float *tf, float *tg, int tas)
      92  {
      93    for (int i = 0; i < tas/2; ++i)
      94      {
      95        float a = te[i*2];
      96        float b = te[i*2+1];
      97        float c = tf[i*2];
      98        float d = tf[i*2+1];
      99        if (td[i*2] != a*c+b*d || td[i*2+1] != b*c-a*d)
     100          return false;
     101      }
     102    return true;
     103  }
     104  
     105  void cmul_conjD(double *td, double *te, double *tf, double *tg, int tas)
     106  {
     107    typedef _Complex double complexT;
     108    int array_size = tas/2;
     109    complexT *d = (complexT*)(td);
     110    complexT *e = (complexT*)(te);
     111    complexT *f = (complexT*)(tf);
     112    for (int i = 0; i < array_size; i++)
     113      {
     114        d[i] = e[i] * conj(f[i]);
     115      }
     116  }
     117  
     118  __attribute__((optimize("no-tree-vectorize")))
     119  bool cmul_conjDcheck(double *td, double *te, double *tf, double *tg, int tas)
     120  {
     121    for (int i = 0; i < tas/2; ++i)
     122      {
     123        double a = te[i*2];
     124        double b = te[i*2+1];
     125        double c = tf[i*2];
     126        double d = tf[i*2+1];
     127        if (td[i*2] != a*c+b*d || td[i*2+1] != b*c-a*d)
     128          return false;
     129      }
     130    return true;
     131  }
     132  
     133  
     134  // addsub
     135  
     136  void addsubF(float *td, float *te, float *tf, float *tg, int tas)
     137  {
     138    typedef _Complex float complexT;
     139    int array_size = tas/2;
     140    complexT *d = (complexT*)(td);
     141    complexT *e = (complexT*)(te);
     142    complexT *f = (complexT*)(tf);
     143    for (int i = 0; i < array_size; i++)
     144      {
     145        d[i] = e[i] - conjf(f[i]);
     146      }
     147  }
     148  
     149  __attribute__((optimize("no-tree-vectorize")))
     150  bool addsubFcheck(float *td, float *te, float *tf, float *tg, int tas)
     151  {
     152    for (int i = 0; i < tas/2; ++i)
     153      {
     154        float a = te[i*2];
     155        float b = te[i*2+1];
     156        float c = tf[i*2];
     157        float d = tf[i*2+1];
     158        if (td[i*2] != a-c || td[i*2+1] != b+d)
     159          return false;
     160      }
     161    return true;
     162  }
     163  
     164  void addsubD(double *td, double *te, double *tf, double *tg, int tas)
     165  {
     166    typedef _Complex double complexT;
     167    int array_size = tas/2;
     168    complexT *d = (complexT*)(td);
     169    complexT *e = (complexT*)(te);
     170    complexT *f = (complexT*)(tf);
     171    for (int i = 0; i < array_size; i++)
     172      {
     173        d[i] = e[i] - conj(f[i]);
     174      }
     175  }
     176  
     177  __attribute__((optimize("no-tree-vectorize")))
     178  bool addsubDcheck(double *td, double *te, double *tf, double *tg, int tas)
     179  {
     180    for (int i = 0; i < tas/2; ++i)
     181      {
     182        double a = te[i*2];
     183        double b = te[i*2+1];
     184        double c = tf[i*2];
     185        double d = tf[i*2+1];
     186        if (td[i*2] != a-c || td[i*2+1] != b+d)
     187          return false;
     188      }
     189    return true;
     190  }
     191  
     192  
     193  // fmaddsub
     194  
     195  void fmaddsubF(float *td, float *te, float *tf, float *tg, int tas)
     196  {
     197    int array_size = tas/2;
     198    for (int i = 0; i < array_size; i++)
     199      {
     200        td[i*2] = te[i*2]*tf[i*2]-tg[i*2];
     201        td[i*2+1] = te[i*2+1]*tf[i*2+1]+tg[i*2+1];
     202      }
     203  }
     204  
     205  __attribute__((optimize("no-tree-vectorize")))
     206  bool fmaddsubFcheck(float *td, float *te, float *tf, float *tg, int tas)
     207  {
     208    for (int i = 0; i < tas/2; ++i)
     209      {
     210        float a = te[i*2];
     211        float b = te[i*2+1];
     212        float c = tf[i*2];
     213        float d = tf[i*2+1];
     214        float e = tg[i*2];
     215        float f = tg[i*2+1];
     216        if (td[i*2] != a*c-e || td[i*2+1] != b*d+f)
     217          return false;
     218      }
     219    return true;
     220  }
     221  
     222  void fmaddsubD(double *td, double *te, double *tf, double *tg, int tas)
     223  {
     224    int array_size = tas/2;
     225    for (int i = 0; i < array_size; i++)
     226      {
     227        td[i*2] = te[i*2]*tf[i*2]-tg[i*2];
     228        td[i*2+1] = te[i*2+1]*tf[i*2+1]+tg[i*2+1];
     229      }
     230  }
     231  
     232  __attribute__((optimize("no-tree-vectorize")))
     233  bool fmaddsubDcheck(double *td, double *te, double *tf, double *tg, int tas)
     234  {
     235    for (int i = 0; i < tas/2; ++i)
     236      {
     237        double a = te[i*2];
     238        double b = te[i*2+1];
     239        double c = tf[i*2];
     240        double d = tf[i*2+1];
     241        double e = tg[i*2];
     242        double f = tg[i*2+1];
     243        if (td[i*2] != a*c-e || td[i*2+1] != b*d+f)
     244          return false;
     245      }
     246    return true;
     247  }
     248  
     249  
     250  // fmsubadd
     251  
     252  void fmsubaddF(float *td, float *te, float *tf, float *tg, int tas)
     253  {
     254    int array_size = tas/2;
     255    for (int i = 0; i < array_size; i++)
     256      {
     257        td[i*2] = te[i*2]*tf[i*2]+tg[i*2];
     258        td[i*2+1] = te[i*2+1]*tf[i*2+1]-tg[i*2+1];
     259      }
     260  }
     261  
     262  __attribute__((optimize("no-tree-vectorize")))
     263  bool fmsubaddFcheck(float *td, float *te, float *tf, float *tg, int tas)
     264  {
     265    for (int i = 0; i < tas/2; ++i)
     266      {
     267        float a = te[i*2];
     268        float b = te[i*2+1];
     269        float c = tf[i*2];
     270        float d = tf[i*2+1];
     271        float e = tg[i*2];
     272        float f = tg[i*2+1];
     273        if (td[i*2] != a*c+e || td[i*2+1] != b*d-f)
     274          return false;
     275      }
     276    return true;
     277  }
     278  
     279  void fmsubaddD(double *td, double *te, double *tf, double *tg, int tas)
     280  {
     281    int array_size = tas/2;
     282    for (int i = 0; i < array_size; i++)
     283      {
     284        td[i*2] = te[i*2]*tf[i*2]+tg[i*2];
     285        td[i*2+1] = te[i*2+1]*tf[i*2+1]-tg[i*2+1];
     286      }
     287  }
     288  
     289  __attribute__((optimize("no-tree-vectorize")))
     290  bool fmsubaddDcheck(double *td, double *te, double *tf, double *tg, int tas)
     291  {
     292    for (int i = 0; i < tas/2; ++i)
     293      {
     294        double a = te[i*2];
     295        double b = te[i*2+1];
     296        double c = tf[i*2];
     297        double d = tf[i*2+1];
     298        double e = tg[i*2];
     299        double f = tg[i*2+1];
     300        if (td[i*2] != a*c+e || td[i*2+1] != b*d-f)
     301          return false;
     302      }
     303    return true;
     304  }
     305  
     306  
     307  // cadd90
     308  
     309  void cadd90F(float *td, float *te, float *tf, float *tg, int tas)
     310  {
     311    int array_size = tas/2;
     312    for (int i = 0; i < array_size; i++)
     313      {
     314        td[i*2] = te[i*2] - tf[i*2+1];
     315        td[i*2+1] = te[i*2+1] + tf[i*2];
     316      }
     317  }
     318  
     319  __attribute__((optimize("no-tree-vectorize")))
     320  bool cadd90Fcheck(float *td, float *te, float *tf, float *tg, int tas)
     321  {
     322    for (int i = 0; i < tas/2; ++i)
     323      {
     324        float a = te[i*2];
     325        float b = te[i*2+1];
     326        float c = tf[i*2];
     327        float d = tf[i*2+1];
     328        if (td[i*2] != a-d || td[i*2+1] != b+c)
     329          return false;
     330      }
     331    return true;
     332  }
     333  
     334  void cadd90D(double *td, double *te, double *tf, double *tg, int tas)
     335  {
     336    int array_size = tas/2;
     337    for (int i = 0; i < array_size; i++)
     338      {
     339        td[i*2] = te[i*2] - tf[i*2+1];
     340        td[i*2+1] = te[i*2+1] + tf[i*2];
     341      }
     342  }
     343  
     344  __attribute__((optimize("no-tree-vectorize")))
     345  bool cadd90Dcheck(double *td, double *te, double *tf, double *tg, int tas)
     346  {
     347    for (int i = 0; i < tas/2; ++i)
     348      {
     349        double a = te[i*2];
     350        double b = te[i*2+1];
     351        double c = tf[i*2];
     352        double d = tf[i*2+1];
     353        if (td[i*2] != a-d || td[i*2+1] != b+c)
     354          return false;
     355      }
     356    return true;
     357  }
     358  
     359  // cadd270
     360  
     361  void cadd270F(float *td, float *te, float *tf, float *tg, int tas)
     362  {
     363    int array_size = tas/2;
     364    for (int i = 0; i < array_size; i++)
     365      {
     366        td[i*2] = te[i*2] + tf[i*2+1];
     367        td[i*2+1] = te[i*2+1] - tf[i*2];
     368      }
     369  }
     370  
     371  __attribute__((optimize("no-tree-vectorize")))
     372  bool cadd270Fcheck(float *td, float *te, float *tf, float *tg, int tas)
     373  {
     374    for (int i = 0; i < tas/2; ++i)
     375      {
     376        float a = te[i*2];
     377        float b = te[i*2+1];
     378        float c = tf[i*2];
     379        float d = tf[i*2+1];
     380        if (td[i*2] != a+d || td[i*2+1] != b-c)
     381          return false;
     382      }
     383    return true;
     384  }
     385  
     386  void cadd270D(double *td, double *te, double *tf, double *tg, int tas)
     387  {
     388    int array_size = tas/2;
     389    for (int i = 0; i < array_size; i++)
     390      {
     391        td[i*2] = te[i*2] + tf[i*2+1];
     392        td[i*2+1] = te[i*2+1] - tf[i*2];
     393      }
     394  }
     395  
     396  __attribute__((optimize("no-tree-vectorize")))
     397  bool cadd270Dcheck(double *td, double *te, double *tf, double *tg, int tas)
     398  {
     399    for (int i = 0; i < tas/2; ++i)
     400      {
     401        double a = te[i*2];
     402        double b = te[i*2+1];
     403        double c = tf[i*2];
     404        double d = tf[i*2+1];
     405        if (td[i*2] != a+d || td[i*2+1] != b-c)
     406          return false;
     407      }
     408    return true;
     409  }
     410  
     411  
     412  // cmla
     413  
     414  void cmlaF(float *td, float *te, float *tf, float *tg, int tas)
     415  {
     416    typedef _Complex float complexT;
     417    int array_size = tas/2;
     418    complexT *d = (complexT*)(td);
     419    complexT *e = (complexT*)(te);
     420    complexT *f = (complexT*)(tf);
     421    complexT *g = (complexT*)(tg);
     422    for (int i = 0; i < array_size; i++)
     423      {
     424        d[i] = e[i] * f[i] + g[i];
     425      }
     426  }
     427  
     428  __attribute__((optimize("no-tree-vectorize")))
     429  bool cmlaFcheck(float *td, float *te, float *tf, float *tg, int tas)
     430  {
     431    for (int i = 0; i < tas/2; ++i)
     432      {
     433        float a = te[i*2];
     434        float b = te[i*2+1];
     435        float c = tf[i*2];
     436        float d = tf[i*2+1];
     437        float e = tg[i*2];
     438        float f = tg[i*2+1];
     439        if (td[i*2] != a*c-b*d+e || td[i*2+1] != a*d+b*c+f)
     440          return false;
     441      }
     442    return true;
     443  }
     444  
     445  void cmlaD(double *td, double *te, double *tf, double *tg, int tas)
     446  {
     447    typedef _Complex double complexT;
     448    int array_size = tas/2;
     449    complexT *d = (complexT*)(td);
     450    complexT *e = (complexT*)(te);
     451    complexT *f = (complexT*)(tf);
     452    complexT *g = (complexT*)(tg);
     453    for (int i = 0; i < array_size; i++)
     454      {
     455        d[i] = e[i] * f[i] + g[i];
     456      }
     457  }
     458  
     459  __attribute__((optimize("no-tree-vectorize")))
     460  bool cmlaDcheck(double *td, double *te, double *tf, double *tg, int tas)
     461  {
     462    for (int i = 0; i < tas/2; ++i)
     463      {
     464        double a = te[i*2];
     465        double b = te[i*2+1];
     466        double c = tf[i*2];
     467        double d = tf[i*2+1];
     468        double e = tg[i*2];
     469        double f = tg[i*2+1];
     470        if (td[i*2] != a*c-b*d+e || td[i*2+1] != a*d+b*c+f)
     471          return false;
     472      }
     473    return true;
     474  }
     475  
     476  
     477  // cmls
     478  
     479  void cmlsF(float *td, float *te, float *tf, float *tg, int tas)
     480  {
     481    typedef _Complex float complexT;
     482    int array_size = tas/2;
     483    complexT *d = (complexT*)(td);
     484    complexT *e = (complexT*)(te);
     485    complexT *f = (complexT*)(tf);
     486    complexT *g = (complexT*)(tg);
     487    for (int i = 0; i < array_size; i++)
     488      {
     489        d[i] = e[i] * f[i] - g[i];
     490      }
     491  }
     492  
     493  __attribute__((optimize("no-tree-vectorize")))
     494  bool cmlsFcheck(float *td, float *te, float *tf, float *tg, int tas)
     495  {
     496    for (int i = 0; i < tas/2; ++i)
     497      {
     498        float a = te[i*2];
     499        float b = te[i*2+1];
     500        float c = tf[i*2];
     501        float d = tf[i*2+1];
     502        float e = tg[i*2];
     503        float f = tg[i*2+1];
     504        if (td[i*2] != a*c-b*d-e || td[i*2+1] != a*d+b*c-f)
     505          return false;
     506      }
     507    return true;
     508  }
     509  
     510  void cmlsD(double *td, double *te, double *tf, double *tg, int tas)
     511  {
     512    typedef _Complex double complexT;
     513    int array_size = tas/2;
     514    complexT *d = (complexT*)(td);
     515    complexT *e = (complexT*)(te);
     516    complexT *f = (complexT*)(tf);
     517    complexT *g = (complexT*)(tg);
     518    for (int i = 0; i < array_size; i++)
     519      {
     520        d[i] = e[i] * f[i] - g[i];
     521      }
     522  }
     523  
     524  __attribute__((optimize("no-tree-vectorize")))
     525  bool cmlsDcheck(double *td, double *te, double *tf, double *tg, int tas)
     526  {
     527    for (int i = 0; i < tas/2; ++i)
     528      {
     529        double a = te[i*2];
     530        double b = te[i*2+1];
     531        double c = tf[i*2];
     532        double d = tf[i*2+1];
     533        double e = tg[i*2];
     534        double f = tg[i*2+1];
     535        if (td[i*2] != a*c-b*d-e || td[i*2+1] != a*d+b*c-f)
     536          return false;
     537      }
     538    return true;
     539  }
     540  
     541  
     542  typedef void(*runF)(float *td, float *te, float *tf, float *tg, int tas);
     543  typedef void(*runD)(double *td, double *te, double *tf, double *tg, int tas);
     544  typedef bool(*checkF)(float *td, float *te, float *tf, float *tg, int tas);
     545  typedef bool(*checkD)(double *td, double *te, double *tf, double *tg, int tas);
     546  
     547  typedef struct
     548  {
     549    runF rF;
     550    runD rD;
     551    checkF cF;
     552    checkD cD;
     553  } operation;
     554  
     555  operation ops[] = {
     556    {cmulF, cmulD, cmulFcheck, cmulDcheck},
     557    {cmul_conjF, cmul_conjD, cmul_conjFcheck, cmul_conjDcheck},
     558    {addsubF, addsubD, addsubFcheck, addsubDcheck},
     559    {fmaddsubF, fmaddsubD, fmaddsubFcheck, fmaddsubDcheck},
     560    {fmsubaddF, fmsubaddD, fmsubaddFcheck, fmsubaddDcheck},
     561    {cadd90F, cadd90D, cadd90Fcheck, cadd90Dcheck},
     562    {cadd270F, cadd270D, cadd270Fcheck, cadd270Dcheck},
     563    {cmlaF, cmlaD, cmlaFcheck, cmlaDcheck},
     564    {cmlsF, cmlsD, cmlsFcheck, cmlsDcheck}
     565  };
     566  
     567  void testF(operation* op)
     568  {
     569    float* td;
     570    float* te;
     571    float* tf;
     572    float* tg;
     573    int array_size = COUNT;
     574    td = (float*)malloc(sizeof(float)*array_size);
     575    te = (float*)malloc(sizeof(float)*array_size);
     576    tf = (float*)malloc(sizeof(float)*array_size);
     577    tg = (float*)malloc(sizeof(float)*array_size);
     578    float* dd = td;
     579    float* ee = te;
     580    float* ff = tf;
     581    float* gg = tg;
     582    for (int i = 0; i < COUNT; ++i)
     583      {
     584        te[i] = (float)(rand() % MAX);
     585        tf[i] = (float)(rand() % MAX);
     586        tg[i] = (float)(rand() % MAX);
     587      }
     588    op->rF(td, te, tf, tg, COUNT);
     589    if (!op->cF(td, te, tf, tg, COUNT))
     590      abort();
     591  }
     592  
     593  void testD(operation* op)
     594  {
     595    double* td;
     596    double* te;
     597    double* tf;
     598    double* tg;
     599    int array_size = COUNT;
     600    td = (double*)malloc(sizeof(double)*array_size);
     601    te = (double*)malloc(sizeof(double)*array_size);
     602    tf = (double*)malloc(sizeof(double)*array_size);
     603    tg = (double*)malloc(sizeof(double)*array_size);
     604    double* dd = td;
     605    double* ee = te;
     606    double* ff = tf;
     607    double* gg = tg;
     608    for (int i = 0; i < COUNT; ++i)
     609      {
     610        te[i] = (double)(rand() % MAX);
     611        tf[i] = (double)(rand() % MAX);
     612        tg[i] = (double)(rand() % MAX);
     613      }
     614    op->rD(td, te, tf, tg, COUNT);
     615    if (!op->cD(td, te, tf, tg, COUNT))
     616      abort();
     617  }
     618  
     619  int main()
     620  {
     621     for (int i = 0; i < 9; ++i)
     622      {
     623        testF(&ops[i]);
     624        testD(&ops[i]);
     625      }
     626  }
     627