(root)/
Python-3.11.7/
Modules/
_blake2/
impl/
blake2b.c
       1  /*
       2     BLAKE2 reference source code package - optimized C implementations
       3  
       4     Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
       5  
       6     To the extent possible under law, the author(s) have dedicated all copyright
       7     and related and neighboring rights to this software to the public domain
       8     worldwide. This software is distributed without any warranty.
       9  
      10     You should have received a copy of the CC0 Public Domain Dedication along with
      11     this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
      12  */
      13  
      14  #include <stdint.h>
      15  #include <string.h>
      16  #include <stdio.h>
      17  
      18  #include "blake2.h"
      19  #include "blake2-impl.h"
      20  
      21  #include "blake2-config.h"
      22  
      23  #if defined(_MSC_VER)
      24  #include <intrin.h>
      25  #endif
      26  
      27  #if defined(HAVE_SSE2)
      28  #include <emmintrin.h>
      29  // MSVC only defines  _mm_set_epi64x for x86_64...
      30  #if defined(_MSC_VER) && !defined(_M_X64)
      31  static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 )
      32  {
      33    return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 );
      34  }
      35  #endif
      36  #endif
      37  
      38  #if defined(HAVE_SSSE3)
      39  #include <tmmintrin.h>
      40  #endif
      41  #if defined(HAVE_SSE4_1)
      42  #include <smmintrin.h>
      43  #endif
      44  #if defined(HAVE_AVX)
      45  #include <immintrin.h>
      46  #endif
      47  #if defined(HAVE_XOP) && !defined(_MSC_VER)
      48  #include <x86intrin.h>
      49  #endif
      50  
      51  
      52  
      53  #include "blake2b-round.h"
      54  
      55  static const uint64_t blake2b_IV[8] =
      56  {
      57    0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
      58    0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
      59    0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
      60    0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
      61  };
      62  
      63  static const uint8_t blake2b_sigma[12][16] =
      64  {
      65    {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
      66    { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 } ,
      67    { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 } ,
      68    {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 } ,
      69    {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 } ,
      70    {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 } ,
      71    { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 } ,
      72    { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 } ,
      73    {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 } ,
      74    { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 } ,
      75    {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
      76    { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 }
      77  };
      78  
      79  
      80  /* Some helper functions, not necessarily useful */
      81  static inline int blake2b_set_lastnode( blake2b_state *S )
      82  {
      83    S->f[1] = ~0ULL;
      84    return 0;
      85  }
      86  
      87  static inline int blake2b_clear_lastnode( blake2b_state *S )
      88  {
      89    S->f[1] = 0ULL;
      90    return 0;
      91  }
      92  
      93  static inline int blake2b_set_lastblock( blake2b_state *S )
      94  {
      95    if( S->last_node ) blake2b_set_lastnode( S );
      96  
      97    S->f[0] = ~0ULL;
      98    return 0;
      99  }
     100  
     101  static inline int blake2b_clear_lastblock( blake2b_state *S )
     102  {
     103    if( S->last_node ) blake2b_clear_lastnode( S );
     104  
     105    S->f[0] = 0ULL;
     106    return 0;
     107  }
     108  
     109  
     110  static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
     111  {
     112  #if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__))
     113    // ADD/ADC chain
     114    __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0];
     115    t += inc;
     116    S->t[0] = ( uint64_t )( t >>  0 );
     117    S->t[1] = ( uint64_t )( t >> 64 );
     118  #else
     119    S->t[0] += inc;
     120    S->t[1] += ( S->t[0] < inc );
     121  #endif
     122    return 0;
     123  }
     124  
     125  
     126  // Parameter-related functions
     127  static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length )
     128  {
     129    P->digest_length = digest_length;
     130    return 0;
     131  }
     132  
     133  static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout )
     134  {
     135    P->fanout = fanout;
     136    return 0;
     137  }
     138  
     139  static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth )
     140  {
     141    P->depth = depth;
     142    return 0;
     143  }
     144  
     145  static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length )
     146  {
     147    P->leaf_length = leaf_length;
     148    return 0;
     149  }
     150  
     151  static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset )
     152  {
     153    P->node_offset = node_offset;
     154    return 0;
     155  }
     156  
     157  static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth )
     158  {
     159    P->node_depth = node_depth;
     160    return 0;
     161  }
     162  
     163  static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length )
     164  {
     165    P->inner_length = inner_length;
     166    return 0;
     167  }
     168  
     169  static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] )
     170  {
     171    memcpy( P->salt, salt, BLAKE2B_SALTBYTES );
     172    return 0;
     173  }
     174  
     175  static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] )
     176  {
     177    memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES );
     178    return 0;
     179  }
     180  
     181  static inline int blake2b_init0( blake2b_state *S )
     182  {
     183    memset( S, 0, sizeof( blake2b_state ) );
     184  
     185    for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
     186  
     187    return 0;
     188  }
     189  
     190  
     191  
     192  #if defined(__cplusplus)
     193  extern "C" {
     194  #endif
     195    int blake2b_init( blake2b_state *S, size_t outlen );
     196    int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
     197    int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
     198    int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen );
     199    int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen );
     200    int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
     201  #if defined(__cplusplus)
     202  }
     203  #endif
     204  
     205  /* init xors IV with input parameter block */
     206  int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
     207  {
     208    uint8_t *p, *h, *v;
     209    //blake2b_init0( S );
     210    v = ( uint8_t * )( blake2b_IV );
     211    h = ( uint8_t * )( S->h );
     212    p = ( uint8_t * )( P );
     213    /* IV XOR ParamBlock */
     214    memset( S, 0, sizeof( blake2b_state ) );
     215  
     216    for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
     217  
     218    S->outlen = P->digest_length;
     219    return 0;
     220  }
     221  
     222  
     223  /* Some sort of default parameter block initialization, for sequential blake2b */
     224  
     225  int blake2b_init( blake2b_state *S, size_t outlen )
     226  {
     227    if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
     228  
     229    const blake2b_param P =
     230    {
     231      ( uint8_t ) outlen,
     232      0,
     233      1,
     234      1,
     235      0,
     236      0,
     237      0,
     238      0,
     239      {0},
     240      {0},
     241      {0}
     242    };
     243    return blake2b_init_param( S, &P );
     244  }
     245  
     246  int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
     247  {
     248    if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
     249  
     250    if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
     251  
     252    const blake2b_param P =
     253    {
     254      ( uint8_t ) outlen,
     255      ( uint8_t ) keylen,
     256      1,
     257      1,
     258      0,
     259      0,
     260      0,
     261      0,
     262      {0},
     263      {0},
     264      {0}
     265    };
     266  
     267    if( blake2b_init_param( S, &P ) < 0 )
     268      return 0;
     269  
     270    {
     271      uint8_t block[BLAKE2B_BLOCKBYTES];
     272      memset( block, 0, BLAKE2B_BLOCKBYTES );
     273      memcpy( block, key, keylen );
     274      blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
     275      secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
     276    }
     277    return 0;
     278  }
     279  
     280  static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
     281  {
     282    __m128i row1l, row1h;
     283    __m128i row2l, row2h;
     284    __m128i row3l, row3h;
     285    __m128i row4l, row4h;
     286    __m128i b0, b1;
     287    __m128i t0, t1;
     288  #if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
     289    const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
     290    const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
     291  #endif
     292  #if defined(HAVE_SSE4_1)
     293    const __m128i m0 = LOADU( block + 00 );
     294    const __m128i m1 = LOADU( block + 16 );
     295    const __m128i m2 = LOADU( block + 32 );
     296    const __m128i m3 = LOADU( block + 48 );
     297    const __m128i m4 = LOADU( block + 64 );
     298    const __m128i m5 = LOADU( block + 80 );
     299    const __m128i m6 = LOADU( block + 96 );
     300    const __m128i m7 = LOADU( block + 112 );
     301  #else
     302    const uint64_t  m0 = ( ( uint64_t * )block )[ 0];
     303    const uint64_t  m1 = ( ( uint64_t * )block )[ 1];
     304    const uint64_t  m2 = ( ( uint64_t * )block )[ 2];
     305    const uint64_t  m3 = ( ( uint64_t * )block )[ 3];
     306    const uint64_t  m4 = ( ( uint64_t * )block )[ 4];
     307    const uint64_t  m5 = ( ( uint64_t * )block )[ 5];
     308    const uint64_t  m6 = ( ( uint64_t * )block )[ 6];
     309    const uint64_t  m7 = ( ( uint64_t * )block )[ 7];
     310    const uint64_t  m8 = ( ( uint64_t * )block )[ 8];
     311    const uint64_t  m9 = ( ( uint64_t * )block )[ 9];
     312    const uint64_t m10 = ( ( uint64_t * )block )[10];
     313    const uint64_t m11 = ( ( uint64_t * )block )[11];
     314    const uint64_t m12 = ( ( uint64_t * )block )[12];
     315    const uint64_t m13 = ( ( uint64_t * )block )[13];
     316    const uint64_t m14 = ( ( uint64_t * )block )[14];
     317    const uint64_t m15 = ( ( uint64_t * )block )[15];
     318  #endif
     319    row1l = LOADU( &S->h[0] );
     320    row1h = LOADU( &S->h[2] );
     321    row2l = LOADU( &S->h[4] );
     322    row2h = LOADU( &S->h[6] );
     323    row3l = LOADU( &blake2b_IV[0] );
     324    row3h = LOADU( &blake2b_IV[2] );
     325    row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
     326    row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
     327    ROUND( 0 );
     328    ROUND( 1 );
     329    ROUND( 2 );
     330    ROUND( 3 );
     331    ROUND( 4 );
     332    ROUND( 5 );
     333    ROUND( 6 );
     334    ROUND( 7 );
     335    ROUND( 8 );
     336    ROUND( 9 );
     337    ROUND( 10 );
     338    ROUND( 11 );
     339    row1l = _mm_xor_si128( row3l, row1l );
     340    row1h = _mm_xor_si128( row3h, row1h );
     341    STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
     342    STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
     343    row2l = _mm_xor_si128( row4l, row2l );
     344    row2h = _mm_xor_si128( row4h, row2h );
     345    STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
     346    STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
     347    return 0;
     348  }
     349  
     350  
     351  int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen )
     352  {
     353    while( inlen > 0 )
     354    {
     355      uint32_t left = S->buflen;
     356      uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
     357  
     358      if( inlen > fill )
     359      {
     360        memcpy( S->buf + left, in, fill ); // Fill buffer
     361        S->buflen += fill;
     362        blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
     363        blake2b_compress( S, S->buf ); // Compress
     364        memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
     365        S->buflen -= BLAKE2B_BLOCKBYTES;
     366        in += fill;
     367        inlen -= fill;
     368      }
     369      else // inlen <= fill
     370      {
     371        memcpy( S->buf + left, in, inlen );
     372        S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress
     373        in += inlen;
     374        inlen -= inlen;
     375      }
     376    }
     377  
     378    return 0;
     379  }
     380  
     381  
     382  int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen )
     383  {
     384    if(S->outlen != outlen) return -1;
     385  
     386    if( S->buflen > BLAKE2B_BLOCKBYTES )
     387    {
     388      blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
     389      blake2b_compress( S, S->buf );
     390      S->buflen -= BLAKE2B_BLOCKBYTES;
     391      memmove( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
     392    }
     393  
     394    blake2b_increment_counter( S, S->buflen );
     395    blake2b_set_lastblock( S );
     396    memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
     397    blake2b_compress( S, S->buf );
     398    memcpy( out, &S->h[0], outlen );
     399    return 0;
     400  }
     401  
     402  
     403  int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
     404  {
     405    blake2b_state S[1];
     406  
     407    /* Verify parameters */
     408    if ( NULL == in && inlen > 0 ) return -1;
     409  
     410    if ( NULL == out ) return -1;
     411  
     412    if( NULL == key && keylen > 0 ) return -1;
     413  
     414    if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
     415  
     416    if( keylen > BLAKE2B_KEYBYTES ) return -1;
     417  
     418    if( keylen )
     419    {
     420      if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
     421    }
     422    else
     423    {
     424      if( blake2b_init( S, outlen ) < 0 ) return -1;
     425    }
     426  
     427    if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0) return -1;
     428    return blake2b_final( S, out, outlen );
     429  }
     430  
     431  #if defined(SUPERCOP)
     432  int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
     433  {
     434    return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 );
     435  }
     436  #endif