1  #include <stdio.h>
       2  #include <complex.h>
       3  
       4  #ifndef PREF
       5  #define PREF c
       6  #endif
       7  
       8  #define FX(N,P) P ## _ ## N
       9  #define MK(N,P) FX(P,N)
      10  
      11  #define N 32
      12  #define TYPE double
      13  
      14  // ------ FMA
      15  
      16  // Complex FMA instructions rotating the result
      17  
      18  __attribute__((noinline,noipa))
      19  void MK(fma0, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
      20  {
      21    for (int i=0; i < N; i++)
      22        c[i] += a[i] * b[i];
      23  }
      24  
      25  __attribute__((noinline,noipa))
      26  void MK(fma90, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
      27  {
      28    for (int i=0; i < N; i++)
      29        c[i] += a[i] * b[i] * I;
      30  }
      31  
      32  __attribute__((noinline,noipa))
      33  void MK(fma180, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
      34  {
      35    for (int i=0; i < N; i++)
      36        c[i] += a[i] * b[i] * I * I;
      37  }
      38  
      39  __attribute__((noinline,noipa))
      40  void MK(fma270, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
      41  {
      42    for (int i=0; i < N; i++)
      43        c[i] += a[i] * b[i] * I * I * I;
      44  }
      45  
      46  // Complex FMA instructions rotating the second parameter.
      47  
      48  
      49  __attribute__((noinline,noipa))
      50  void MK(fma0_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
      51  {
      52    for (int i=0; i < N; i++)
      53        c[i] += a[i] * b[i];
      54  }
      55  
      56  __attribute__((noinline,noipa))
      57  void MK(fma90_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
      58  {
      59    for (int i=0; i < N; i++)
      60        c[i] += a[i] * (b[i] * I);
      61  }
      62  
      63  __attribute__((noinline,noipa))
      64  void MK(fma180_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
      65  {
      66    for (int i=0; i < N; i++)
      67        c[i] += a[i] * (b[i] * I * I);
      68  }
      69  
      70  __attribute__((noinline,noipa))
      71  void MK(fma270_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
      72  {
      73    for (int i=0; i < N; i++)
      74        c[i] += a[i] * (b[i] * I * I * I);
      75  }
      76  
      77  // Complex FMA instructions with conjucated values.
      78  
      79  
      80  __attribute__((noinline,noipa))
      81  void MK(fma_conj_first, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
      82  {
      83    for (int i=0; i < N; i++)
      84        c[i] += conj (a[i]) * b[i];
      85  }
      86  
      87  __attribute__((noinline,noipa))
      88  void MK(fma_conj_second, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
      89  {
      90    for (int i=0; i < N; i++)
      91        c[i] += a[i] * conj (b[i]);
      92  }
      93  
      94  __attribute__((noinline,noipa))
      95  void MK(fma_conj_both, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
      96  {
      97    for (int i=0; i < N; i++)
      98        c[i] += conj (a[i]) * conj (b[i]);
      99  }
     100  
     101  // ----- FMS
     102  
     103  // Complex FMS instructions rotating the result
     104  
     105  __attribute__((noinline,noipa))
     106  void MK(fms0, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     107  {
     108    for (int i=0; i < N; i++)
     109        c[i] -= a[i] * b[i];
     110  }
     111  
     112  __attribute__((noinline,noipa))
     113  void MK(fms90, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     114  {
     115    for (int i=0; i < N; i++)
     116        c[i] -= a[i] * b[i] * I;
     117  }
     118  
     119  __attribute__((noinline,noipa))
     120  void MK(fms180, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     121  {
     122    for (int i=0; i < N; i++)
     123        c[i] -= a[i] * b[i] * I * I;
     124  }
     125  
     126  __attribute__((noinline,noipa))
     127  void MK(fms270, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     128  {
     129    for (int i=0; i < N; i++)
     130        c[i] -= a[i] * b[i] * I * I * I;
     131  }
     132  
     133  // Complex FMS instructions rotating the second parameter.
     134  
     135  __attribute__((noinline,noipa))
     136  void MK(fms0_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     137  {
     138    for (int i=0; i < N; i++)
     139        c[i] -= a[i] * b[i];
     140  }
     141  
     142  __attribute__((noinline,noipa))
     143  void MK(fms90_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     144  {
     145    for (int i=0; i < N; i++)
     146        c[i] -= a[i] * (b[i] * I);
     147  }
     148  
     149  __attribute__((noinline,noipa))
     150  void MK(fms180_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     151  {
     152    for (int i=0; i < N; i++)
     153        c[i] -= a[i] * (b[i] * I * I);
     154  }
     155  
     156  __attribute__((noinline,noipa))
     157  void MK(fms270_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     158  {
     159    for (int i=0; i < N; i++)
     160        c[i] -= a[i] * (b[i] * I * I * I);
     161  }
     162  
     163  // Complex FMS instructions with conjucated values.
     164  
     165  __attribute__((noinline,noipa))
     166  void MK(fms_conj_first, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     167  {
     168    for (int i=0; i < N; i++)
     169        c[i] -= conj (a[i]) * b[i];
     170  }
     171  
     172  __attribute__((noinline,noipa))
     173  void MK(fms_conj_second, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     174  {
     175    for (int i=0; i < N; i++)
     176        c[i] -= a[i] * conj (b[i]);
     177  }
     178  
     179  __attribute__((noinline,noipa))
     180  void MK(fms_conj_both, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     181  {
     182    for (int i=0; i < N; i++)
     183        c[i] -= conj (a[i]) * conj (b[i]);
     184  }
     185  
     186  
     187  // ----- MUL
     188  
     189  // Complex MUL instructions rotating the result
     190  
     191  __attribute__((noinline,noipa))
     192  void MK(mul0, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     193  {
     194    for (int i=0; i < N; i++)
     195        c[i] = a[i] * b[i];
     196  }
     197  
     198  __attribute__((noinline,noipa))
     199  void MK(mul90, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     200  {
     201    for (int i=0; i < N; i++)
     202        c[i] = a[i] * b[i] * I;
     203  }
     204  
     205  __attribute__((noinline,noipa))
     206  void MK(mul180, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     207  {
     208    for (int i=0; i < N; i++)
     209        c[i] = a[i] * b[i] * I * I;
     210  }
     211  
     212  __attribute__((noinline,noipa))
     213  void MK(mul270, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     214  {
     215    for (int i=0; i < N; i++)
     216        c[i] = a[i] * b[i] * I * I * I;
     217  }
     218  
     219  // Complex MUL instructions rotating the second parameter.
     220  
     221  __attribute__((noinline,noipa))
     222  void MK(mul0_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     223  {
     224    for (int i=0; i < N; i++)
     225        c[i] = a[i] * b[i];
     226  }
     227  
     228  __attribute__((noinline,noipa))
     229  void MK(mul90_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     230  {
     231    for (int i=0; i < N; i++)
     232        c[i] = a[i] * (b[i] * I);
     233  }
     234  
     235  __attribute__((noinline,noipa))
     236  void MK(mul180_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     237  {
     238    for (int i=0; i < N; i++)
     239        c[i] = a[i] * (b[i] * I * I);
     240  }
     241  
     242  __attribute__((noinline,noipa))
     243  void MK(mul270_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     244  {
     245    for (int i=0; i < N; i++)
     246        c[i] = a[i] * (b[i] * I * I * I);
     247  }
     248  
     249  // Complex FMS instructions with conjucated values.
     250  
     251  __attribute__((noinline,noipa))
     252  void MK(mul_conj_first, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     253  {
     254    for (int i=0; i < N; i++)
     255        c[i] = conj (a[i]) * b[i];
     256  }
     257  
     258  __attribute__((noinline,noipa))
     259  void MK(mul_conj_second, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     260  {
     261    for (int i=0; i < N; i++)
     262        c[i] = a[i] * conj (b[i]);
     263  }
     264  
     265  __attribute__((noinline,noipa))
     266  void MK(mul_conj_both, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     267  {
     268    for (int i=0; i < N; i++)
     269        c[i] = conj (a[i]) * conj (b[i]);
     270  }
     271  
     272  
     273  // ----- ADD
     274  
     275  // Complex ADD instructions rotating the result
     276  
     277  __attribute__((noinline,noipa))
     278  void MK(add0, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     279  {
     280    for (int i=0; i < N; i++)
     281        c[i] = a[i] + b[i];
     282  }
     283  
     284  __attribute__((noinline,noipa))
     285  void MK(add90, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     286  {
     287    for (int i=0; i < N; i++)
     288        c[i] = (a[i] + b[i]) * I;
     289  }
     290  
     291  __attribute__((noinline,noipa))
     292  void MK(add180, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     293  {
     294    for (int i=0; i < N; i++)
     295        c[i] = (a[i] + b[i]) * I * I;
     296  }
     297  
     298  __attribute__((noinline,noipa))
     299  void MK(add270, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     300  {
     301    for (int i=0; i < N; i++)
     302        c[i] = (a[i] + b[i]) * I * I * I;
     303  }
     304  
     305  // Complex ADD instructions rotating the second parameter.
     306  
     307  __attribute__((noinline,noipa))
     308  void MK(add0_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     309  {
     310    for (int i=0; i < N; i++)
     311        c[i] = a[i] + b[i];
     312  }
     313  
     314  __attribute__((noinline,noipa))
     315  void MK(add90_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     316  {
     317    for (int i=0; i < N; i++)
     318        c[i] = a[i] + (b[i] * I);
     319  }
     320  
     321  __attribute__((noinline,noipa))
     322  void MK(add180_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     323  {
     324    for (int i=0; i < N; i++)
     325        c[i] = a[i] + (b[i] * I * I);
     326  }
     327  
     328  __attribute__((noinline,noipa))
     329  void MK(add270_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     330  {
     331    for (int i=0; i < N; i++)
     332        c[i] = a[i] + (b[i] * I * I * I);
     333  }
     334  
     335  // Complex ADD instructions with conjucated values.
     336  
     337  __attribute__((noinline,noipa))
     338  void MK(add_conj_first, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     339  {
     340    for (int i=0; i < N; i++)
     341        c[i] = conj (a[i]) + b[i];
     342  }
     343  
     344  __attribute__((noinline,noipa))
     345  void MK(add_conj_second, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     346  {
     347    for (int i=0; i < N; i++)
     348        c[i] = a[i] + conj (b[i]);
     349  }
     350  
     351  __attribute__((noinline,noipa))
     352  void MK(add_conj_both, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
     353  {
     354    for (int i=0; i < N; i++)
     355        c[i] = conj (a[i]) + conj (b[i]);
     356  }
     357  
     358