(root)/
Python-3.11.7/
Modules/
_blake2/
impl/
blake2b-load-sse41.h
       1  /*
       2     BLAKE2 reference source code package - optimized C implementations
       3  
       4     Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
       5  
       6     To the extent possible under law, the author(s) have dedicated all copyright
       7     and related and neighboring rights to this software to the public domain
       8     worldwide. This software is distributed without any warranty.
       9  
      10     You should have received a copy of the CC0 Public Domain Dedication along with
      11     this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
      12  */
      13  #pragma once
      14  #ifndef __BLAKE2B_LOAD_SSE41_H__
      15  #define __BLAKE2B_LOAD_SSE41_H__
      16  
      17  #define LOAD_MSG_0_1(b0, b1) \
      18  do \
      19  { \
      20  b0 = _mm_unpacklo_epi64(m0, m1); \
      21  b1 = _mm_unpacklo_epi64(m2, m3); \
      22  } while(0)
      23  
      24  
      25  #define LOAD_MSG_0_2(b0, b1) \
      26  do \
      27  { \
      28  b0 = _mm_unpackhi_epi64(m0, m1); \
      29  b1 = _mm_unpackhi_epi64(m2, m3); \
      30  } while(0)
      31  
      32  
      33  #define LOAD_MSG_0_3(b0, b1) \
      34  do \
      35  { \
      36  b0 = _mm_unpacklo_epi64(m4, m5); \
      37  b1 = _mm_unpacklo_epi64(m6, m7); \
      38  } while(0)
      39  
      40  
      41  #define LOAD_MSG_0_4(b0, b1) \
      42  do \
      43  { \
      44  b0 = _mm_unpackhi_epi64(m4, m5); \
      45  b1 = _mm_unpackhi_epi64(m6, m7); \
      46  } while(0)
      47  
      48  
      49  #define LOAD_MSG_1_1(b0, b1) \
      50  do \
      51  { \
      52  b0 = _mm_unpacklo_epi64(m7, m2); \
      53  b1 = _mm_unpackhi_epi64(m4, m6); \
      54  } while(0)
      55  
      56  
      57  #define LOAD_MSG_1_2(b0, b1) \
      58  do \
      59  { \
      60  b0 = _mm_unpacklo_epi64(m5, m4); \
      61  b1 = _mm_alignr_epi8(m3, m7, 8); \
      62  } while(0)
      63  
      64  
      65  #define LOAD_MSG_1_3(b0, b1) \
      66  do \
      67  { \
      68  b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
      69  b1 = _mm_unpackhi_epi64(m5, m2); \
      70  } while(0)
      71  
      72  
      73  #define LOAD_MSG_1_4(b0, b1) \
      74  do \
      75  { \
      76  b0 = _mm_unpacklo_epi64(m6, m1); \
      77  b1 = _mm_unpackhi_epi64(m3, m1); \
      78  } while(0)
      79  
      80  
      81  #define LOAD_MSG_2_1(b0, b1) \
      82  do \
      83  { \
      84  b0 = _mm_alignr_epi8(m6, m5, 8); \
      85  b1 = _mm_unpackhi_epi64(m2, m7); \
      86  } while(0)
      87  
      88  
      89  #define LOAD_MSG_2_2(b0, b1) \
      90  do \
      91  { \
      92  b0 = _mm_unpacklo_epi64(m4, m0); \
      93  b1 = _mm_blend_epi16(m1, m6, 0xF0); \
      94  } while(0)
      95  
      96  
      97  #define LOAD_MSG_2_3(b0, b1) \
      98  do \
      99  { \
     100  b0 = _mm_blend_epi16(m5, m1, 0xF0); \
     101  b1 = _mm_unpackhi_epi64(m3, m4); \
     102  } while(0)
     103  
     104  
     105  #define LOAD_MSG_2_4(b0, b1) \
     106  do \
     107  { \
     108  b0 = _mm_unpacklo_epi64(m7, m3); \
     109  b1 = _mm_alignr_epi8(m2, m0, 8); \
     110  } while(0)
     111  
     112  
     113  #define LOAD_MSG_3_1(b0, b1) \
     114  do \
     115  { \
     116  b0 = _mm_unpackhi_epi64(m3, m1); \
     117  b1 = _mm_unpackhi_epi64(m6, m5); \
     118  } while(0)
     119  
     120  
     121  #define LOAD_MSG_3_2(b0, b1) \
     122  do \
     123  { \
     124  b0 = _mm_unpackhi_epi64(m4, m0); \
     125  b1 = _mm_unpacklo_epi64(m6, m7); \
     126  } while(0)
     127  
     128  
     129  #define LOAD_MSG_3_3(b0, b1) \
     130  do \
     131  { \
     132  b0 = _mm_blend_epi16(m1, m2, 0xF0); \
     133  b1 = _mm_blend_epi16(m2, m7, 0xF0); \
     134  } while(0)
     135  
     136  
     137  #define LOAD_MSG_3_4(b0, b1) \
     138  do \
     139  { \
     140  b0 = _mm_unpacklo_epi64(m3, m5); \
     141  b1 = _mm_unpacklo_epi64(m0, m4); \
     142  } while(0)
     143  
     144  
     145  #define LOAD_MSG_4_1(b0, b1) \
     146  do \
     147  { \
     148  b0 = _mm_unpackhi_epi64(m4, m2); \
     149  b1 = _mm_unpacklo_epi64(m1, m5); \
     150  } while(0)
     151  
     152  
     153  #define LOAD_MSG_4_2(b0, b1) \
     154  do \
     155  { \
     156  b0 = _mm_blend_epi16(m0, m3, 0xF0); \
     157  b1 = _mm_blend_epi16(m2, m7, 0xF0); \
     158  } while(0)
     159  
     160  
     161  #define LOAD_MSG_4_3(b0, b1) \
     162  do \
     163  { \
     164  b0 = _mm_blend_epi16(m7, m5, 0xF0); \
     165  b1 = _mm_blend_epi16(m3, m1, 0xF0); \
     166  } while(0)
     167  
     168  
     169  #define LOAD_MSG_4_4(b0, b1) \
     170  do \
     171  { \
     172  b0 = _mm_alignr_epi8(m6, m0, 8); \
     173  b1 = _mm_blend_epi16(m4, m6, 0xF0); \
     174  } while(0)
     175  
     176  
     177  #define LOAD_MSG_5_1(b0, b1) \
     178  do \
     179  { \
     180  b0 = _mm_unpacklo_epi64(m1, m3); \
     181  b1 = _mm_unpacklo_epi64(m0, m4); \
     182  } while(0)
     183  
     184  
     185  #define LOAD_MSG_5_2(b0, b1) \
     186  do \
     187  { \
     188  b0 = _mm_unpacklo_epi64(m6, m5); \
     189  b1 = _mm_unpackhi_epi64(m5, m1); \
     190  } while(0)
     191  
     192  
     193  #define LOAD_MSG_5_3(b0, b1) \
     194  do \
     195  { \
     196  b0 = _mm_blend_epi16(m2, m3, 0xF0); \
     197  b1 = _mm_unpackhi_epi64(m7, m0); \
     198  } while(0)
     199  
     200  
     201  #define LOAD_MSG_5_4(b0, b1) \
     202  do \
     203  { \
     204  b0 = _mm_unpackhi_epi64(m6, m2); \
     205  b1 = _mm_blend_epi16(m7, m4, 0xF0); \
     206  } while(0)
     207  
     208  
     209  #define LOAD_MSG_6_1(b0, b1) \
     210  do \
     211  { \
     212  b0 = _mm_blend_epi16(m6, m0, 0xF0); \
     213  b1 = _mm_unpacklo_epi64(m7, m2); \
     214  } while(0)
     215  
     216  
     217  #define LOAD_MSG_6_2(b0, b1) \
     218  do \
     219  { \
     220  b0 = _mm_unpackhi_epi64(m2, m7); \
     221  b1 = _mm_alignr_epi8(m5, m6, 8); \
     222  } while(0)
     223  
     224  
     225  #define LOAD_MSG_6_3(b0, b1) \
     226  do \
     227  { \
     228  b0 = _mm_unpacklo_epi64(m0, m3); \
     229  b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
     230  } while(0)
     231  
     232  
     233  #define LOAD_MSG_6_4(b0, b1) \
     234  do \
     235  { \
     236  b0 = _mm_unpackhi_epi64(m3, m1); \
     237  b1 = _mm_blend_epi16(m1, m5, 0xF0); \
     238  } while(0)
     239  
     240  
     241  #define LOAD_MSG_7_1(b0, b1) \
     242  do \
     243  { \
     244  b0 = _mm_unpackhi_epi64(m6, m3); \
     245  b1 = _mm_blend_epi16(m6, m1, 0xF0); \
     246  } while(0)
     247  
     248  
     249  #define LOAD_MSG_7_2(b0, b1) \
     250  do \
     251  { \
     252  b0 = _mm_alignr_epi8(m7, m5, 8); \
     253  b1 = _mm_unpackhi_epi64(m0, m4); \
     254  } while(0)
     255  
     256  
     257  #define LOAD_MSG_7_3(b0, b1) \
     258  do \
     259  { \
     260  b0 = _mm_unpackhi_epi64(m2, m7); \
     261  b1 = _mm_unpacklo_epi64(m4, m1); \
     262  } while(0)
     263  
     264  
     265  #define LOAD_MSG_7_4(b0, b1) \
     266  do \
     267  { \
     268  b0 = _mm_unpacklo_epi64(m0, m2); \
     269  b1 = _mm_unpacklo_epi64(m3, m5); \
     270  } while(0)
     271  
     272  
     273  #define LOAD_MSG_8_1(b0, b1) \
     274  do \
     275  { \
     276  b0 = _mm_unpacklo_epi64(m3, m7); \
     277  b1 = _mm_alignr_epi8(m0, m5, 8); \
     278  } while(0)
     279  
     280  
     281  #define LOAD_MSG_8_2(b0, b1) \
     282  do \
     283  { \
     284  b0 = _mm_unpackhi_epi64(m7, m4); \
     285  b1 = _mm_alignr_epi8(m4, m1, 8); \
     286  } while(0)
     287  
     288  
     289  #define LOAD_MSG_8_3(b0, b1) \
     290  do \
     291  { \
     292  b0 = m6; \
     293  b1 = _mm_alignr_epi8(m5, m0, 8); \
     294  } while(0)
     295  
     296  
     297  #define LOAD_MSG_8_4(b0, b1) \
     298  do \
     299  { \
     300  b0 = _mm_blend_epi16(m1, m3, 0xF0); \
     301  b1 = m2; \
     302  } while(0)
     303  
     304  
     305  #define LOAD_MSG_9_1(b0, b1) \
     306  do \
     307  { \
     308  b0 = _mm_unpacklo_epi64(m5, m4); \
     309  b1 = _mm_unpackhi_epi64(m3, m0); \
     310  } while(0)
     311  
     312  
     313  #define LOAD_MSG_9_2(b0, b1) \
     314  do \
     315  { \
     316  b0 = _mm_unpacklo_epi64(m1, m2); \
     317  b1 = _mm_blend_epi16(m3, m2, 0xF0); \
     318  } while(0)
     319  
     320  
     321  #define LOAD_MSG_9_3(b0, b1) \
     322  do \
     323  { \
     324  b0 = _mm_unpackhi_epi64(m7, m4); \
     325  b1 = _mm_unpackhi_epi64(m1, m6); \
     326  } while(0)
     327  
     328  
     329  #define LOAD_MSG_9_4(b0, b1) \
     330  do \
     331  { \
     332  b0 = _mm_alignr_epi8(m7, m5, 8); \
     333  b1 = _mm_unpacklo_epi64(m6, m0); \
     334  } while(0)
     335  
     336  
     337  #define LOAD_MSG_10_1(b0, b1) \
     338  do \
     339  { \
     340  b0 = _mm_unpacklo_epi64(m0, m1); \
     341  b1 = _mm_unpacklo_epi64(m2, m3); \
     342  } while(0)
     343  
     344  
     345  #define LOAD_MSG_10_2(b0, b1) \
     346  do \
     347  { \
     348  b0 = _mm_unpackhi_epi64(m0, m1); \
     349  b1 = _mm_unpackhi_epi64(m2, m3); \
     350  } while(0)
     351  
     352  
     353  #define LOAD_MSG_10_3(b0, b1) \
     354  do \
     355  { \
     356  b0 = _mm_unpacklo_epi64(m4, m5); \
     357  b1 = _mm_unpacklo_epi64(m6, m7); \
     358  } while(0)
     359  
     360  
     361  #define LOAD_MSG_10_4(b0, b1) \
     362  do \
     363  { \
     364  b0 = _mm_unpackhi_epi64(m4, m5); \
     365  b1 = _mm_unpackhi_epi64(m6, m7); \
     366  } while(0)
     367  
     368  
     369  #define LOAD_MSG_11_1(b0, b1) \
     370  do \
     371  { \
     372  b0 = _mm_unpacklo_epi64(m7, m2); \
     373  b1 = _mm_unpackhi_epi64(m4, m6); \
     374  } while(0)
     375  
     376  
     377  #define LOAD_MSG_11_2(b0, b1) \
     378  do \
     379  { \
     380  b0 = _mm_unpacklo_epi64(m5, m4); \
     381  b1 = _mm_alignr_epi8(m3, m7, 8); \
     382  } while(0)
     383  
     384  
     385  #define LOAD_MSG_11_3(b0, b1) \
     386  do \
     387  { \
     388  b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
     389  b1 = _mm_unpackhi_epi64(m5, m2); \
     390  } while(0)
     391  
     392  
     393  #define LOAD_MSG_11_4(b0, b1) \
     394  do \
     395  { \
     396  b0 = _mm_unpacklo_epi64(m6, m1); \
     397  b1 = _mm_unpackhi_epi64(m3, m1); \
     398  } while(0)
     399  
     400  
     401  #endif
     402