| /* |
| * Copyright (C) 2018 Denys Vlasenko |
| * |
| * Licensed under GPLv2, see file LICENSE in this source tree. |
| */ |
| |
| #include "tls.h" |
| |
| typedef uint8_t byte; |
| typedef uint32_t word32; |
| #define XMEMSET memset |
| #define XMEMCPY memcpy |
| |
| /* from wolfssl-3.15.3/wolfcrypt/src/aes.c */ |
| |
| #ifdef UNUSED |
| static ALWAYS_INLINE void FlattenSzInBits(byte* buf, word32 sz) |
| { |
| /* Multiply the sz by 8 */ |
| //bbox: these sizes are never even close to 2^32/8 |
| // word32 szHi = (sz >> (8*sizeof(sz) - 3)); |
| sz <<= 3; |
| |
| /* copy over the words of the sz into the destination buffer */ |
| // buf[0] = (szHi >> 24) & 0xff; |
| // buf[1] = (szHi >> 16) & 0xff; |
| // buf[2] = (szHi >> 8) & 0xff; |
| // buf[3] = szHi & 0xff; |
| *(uint32_t*)(buf + 0) = 0; |
| // buf[4] = (sz >> 24) & 0xff; |
| // buf[5] = (sz >> 16) & 0xff; |
| // buf[6] = (sz >> 8) & 0xff; |
| // buf[7] = sz & 0xff; |
| *(uint32_t*)(buf + 4) = SWAP_BE32(sz); |
| } |
| #endif |
| |
| static void RIGHTSHIFTX(byte* x) |
| { |
| #define l ((unsigned long*)x) |
| #if 0 |
| |
| // Generic byte-at-a-time algorithm |
| int i; |
| byte carryIn = (x[15] & 0x01) ? 0xE1 : 0; |
| for (i = 0; i < AES_BLOCK_SIZE; i++) { |
| byte carryOut = (x[i] << 7); // zero, or 0x80 |
| x[i] = (x[i] >> 1) ^ carryIn; |
| carryIn = carryOut; |
| } |
| |
| #elif BB_BIG_ENDIAN |
| |
| // Big-endian can shift-right in larger than byte chunks |
| // (we use the fact that 'x' is long-aligned) |
| unsigned long carryIn = (x[15] & 0x01) |
| ? ((unsigned long)0xE1 << (LONG_BIT-8)) |
| : 0; |
| # if ULONG_MAX <= 0xffffffff |
| int i; |
| for (i = 0; i < AES_BLOCK_SIZE/sizeof(long); i++) { |
| unsigned long carryOut = l[i] << (LONG_BIT-1); // zero, or 0x800..00 |
| l[i] = (l[i] >> 1) ^ carryIn; |
| carryIn = carryOut; |
| } |
| # else |
| // 64-bit code: need to process only 2 words |
| unsigned long carryOut = l[0] << (LONG_BIT-1); // zero, or 0x800..00 |
| l[0] = (l[0] >> 1) ^ carryIn; |
| l[1] = (l[1] >> 1) ^ carryOut; |
| # endif |
| |
| #else /* LITTLE_ENDIAN */ |
| |
| // In order to use word-sized ops, little-endian needs to byteswap. |
| // On x86, code size increase is ~10 bytes compared to byte-by-byte. |
| unsigned long carryIn = (x[15] & 0x01) |
| ? ((unsigned long)0xE1 << (LONG_BIT-8)) |
| : 0; |
| # if ULONG_MAX <= 0xffffffff |
| int i; |
| for (i = 0; i < AES_BLOCK_SIZE/sizeof(long); i++) { |
| unsigned long ti = SWAP_BE32(l[i]); |
| unsigned long carryOut = ti << (LONG_BIT-1); // zero, or 0x800..00 |
| ti = (ti >> 1) ^ carryIn; |
| l[i] = SWAP_BE32(ti); |
| carryIn = carryOut; |
| } |
| # else |
| // 64-bit code: need to process only 2 words |
| unsigned long tt = SWAP_BE64(l[0]); |
| unsigned long carryOut = tt << (LONG_BIT-1); // zero, or 0x800..00 |
| tt = (tt >> 1) ^ carryIn; l[0] = SWAP_BE64(tt); |
| tt = SWAP_BE64(l[1]); |
| tt = (tt >> 1) ^ carryOut; l[1] = SWAP_BE64(tt); |
| # endif |
| |
| #endif /* LITTLE_ENDIAN */ |
| #undef l |
| } |
| |
| // Caller guarantees X is aligned |
| static void GMULT(byte* X, byte* Y) |
| { |
| byte Z[AES_BLOCK_SIZE] ALIGNED_long; |
| //byte V[AES_BLOCK_SIZE] ALIGNED_long; |
| int i; |
| |
| XMEMSET(Z, 0, AES_BLOCK_SIZE); |
| //XMEMCPY(V, X, AES_BLOCK_SIZE); |
| for (i = 0; i < AES_BLOCK_SIZE; i++) { |
| uint32_t y = 0x800000 | Y[i]; |
| for (;;) { // for every bit in Y[i], from msb to lsb |
| if (y & 0x80) { |
| xorbuf_aligned_AES_BLOCK_SIZE(Z, X); // was V, not X |
| } |
| RIGHTSHIFTX(X); // was V, not X |
| y = y << 1; |
| if ((int32_t)y < 0) // if bit 0x80000000 set = if 8 iterations done |
| break; |
| } |
| } |
| XMEMCPY(X, Z, AES_BLOCK_SIZE); |
| } |
| |
| //bbox: |
| // for TLS AES-GCM, a (which is AAD) is always 13 bytes long, and bbox code provides |
| // extra 3 zeroed bytes, making it a[16], or a[AES_BLOCK_SIZE]. |
| // Resulting auth tag in s[] is also always AES_BLOCK_SIZE bytes. |
| // |
| // This allows some simplifications. |
| #define aSz 13 |
| #define sSz AES_BLOCK_SIZE |
| void FAST_FUNC aesgcm_GHASH(byte* h, |
| const byte* a, //unsigned aSz, |
| const byte* c, unsigned cSz, |
| byte* s //, unsigned sSz |
| ) |
| { |
| byte x[AES_BLOCK_SIZE] ALIGNED_long; |
| // byte scratch[AES_BLOCK_SIZE] ALIGNED_long; |
| unsigned blocks, partial; |
| //was: byte* h = aes->H; |
| |
| //XMEMSET(x, 0, AES_BLOCK_SIZE); |
| |
| /* Hash in A, the Additional Authentication Data */ |
| // if (aSz != 0 && a != NULL) { |
| // blocks = aSz / AES_BLOCK_SIZE; |
| // partial = aSz % AES_BLOCK_SIZE; |
| // while (blocks--) { |
| //xorbuf(x, a, AES_BLOCK_SIZE); |
| XMEMCPY(x, a, AES_BLOCK_SIZE);// memcpy(x,a) = memset(x,0)+xorbuf(x,a) |
| GMULT(x, h); |
| // a += AES_BLOCK_SIZE; |
| // } |
| // if (partial != 0) { |
| // XMEMSET(scratch, 0, AES_BLOCK_SIZE); |
| // XMEMCPY(scratch, a, partial); |
| // xorbuf(x, scratch, AES_BLOCK_SIZE); |
| // GMULT(x, h); |
| // } |
| // } |
| |
| /* Hash in C, the Ciphertext */ |
| if (cSz != 0 /*&& c != NULL*/) { |
| blocks = cSz / AES_BLOCK_SIZE; |
| partial = cSz % AES_BLOCK_SIZE; |
| while (blocks--) { |
| if (BB_UNALIGNED_MEMACCESS_OK) // c is not guaranteed to be aligned |
| xorbuf_aligned_AES_BLOCK_SIZE(x, c); |
| else |
| xorbuf(x, c, AES_BLOCK_SIZE); |
| GMULT(x, h); |
| c += AES_BLOCK_SIZE; |
| } |
| if (partial != 0) { |
| //XMEMSET(scratch, 0, AES_BLOCK_SIZE); |
| //XMEMCPY(scratch, c, partial); |
| //xorbuf(x, scratch, AES_BLOCK_SIZE); |
| xorbuf(x, c, partial);//same result as above |
| GMULT(x, h); |
| } |
| } |
| |
| /* Hash in the lengths of A and C in bits */ |
| //FlattenSzInBits(&scratch[0], aSz); |
| //FlattenSzInBits(&scratch[8], cSz); |
| //xorbuf_aligned_AES_BLOCK_SIZE(x, scratch); |
| // simpler: |
| #define P32(v) ((uint32_t*)v) |
| //P32(x)[0] ^= 0; |
| P32(x)[1] ^= SWAP_BE32(aSz * 8); |
| //P32(x)[2] ^= 0; |
| P32(x)[3] ^= SWAP_BE32(cSz * 8); |
| #undef P32 |
| |
| GMULT(x, h); |
| |
| /* Copy the result into s. */ |
| XMEMCPY(s, x, sSz); |
| } |