"Fossies" - the Fresh Open Source Software Archive 
Member "n2n-3.1.1/src/aes.c" (31 Mar 2022, 72993 Bytes) of package /linux/misc/n2n-3.1.1.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "aes.c" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
3.0_vs_3.1.1.
1 /**
2 * (C) 2007-22 - ntop.org and contributors
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not see see <http://www.gnu.org/licenses/>
16 *
17 */
18
19
20 #include "n2n.h"
21
22
23 #if defined (HAVE_OPENSSL_1_1) // openSSL 1.1 ---------------------------------------------------------------------
24
25
26 // get any erorr message out of openssl
27 // taken from https://en.wikibooks.org/wiki/OpenSSL/Error_handling
28 static char *openssl_err_as_string (void) {
29
30 BIO *bio = BIO_new (BIO_s_mem ());
31 ERR_print_errors (bio);
32 char *buf = NULL;
33 size_t len = BIO_get_mem_data (bio, &buf);
34 char *ret = (char *) calloc (1, 1 + len);
35
36 if(ret)
37 memcpy (ret, buf, len);
38
39 BIO_free (bio);
40
41 return ret;
42 }
43
44
45 int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len,
46 const unsigned char *iv, aes_context_t *ctx) {
47
48 int evp_len;
49 int evp_ciphertext_len;
50
51 if(1 == EVP_EncryptInit_ex(ctx->enc_ctx, ctx->cipher, NULL, ctx->key, iv)) {
52 if(1 == EVP_CIPHER_CTX_set_padding(ctx->enc_ctx, 0)) {
53 if(1 == EVP_EncryptUpdate(ctx->enc_ctx, out, &evp_len, in, in_len)) {
54 evp_ciphertext_len = evp_len;
55 if(1 == EVP_EncryptFinal_ex(ctx->enc_ctx, out + evp_len, &evp_len)) {
56 evp_ciphertext_len += evp_len;
57 if(evp_ciphertext_len != in_len)
58 traceEvent(TRACE_ERROR, "aes_cbc_encrypt openssl encryption: encrypted %u bytes where %u were expected",
59 evp_ciphertext_len, in_len);
60 } else
61 traceEvent(TRACE_ERROR, "aes_cbc_encrypt openssl final encryption: %s",
62 openssl_err_as_string());
63 } else
64 traceEvent(TRACE_ERROR, "aes_cbc_encrypt openssl encrpytion: %s",
65 openssl_err_as_string());
66 } else
67 traceEvent(TRACE_ERROR, "aes_cbc_encrypt openssl padding setup: %s",
68 openssl_err_as_string());
69 } else
70 traceEvent(TRACE_ERROR, "aes_cbc_encrypt openssl init: %s",
71 openssl_err_as_string());
72
73 EVP_CIPHER_CTX_reset(ctx->enc_ctx);
74
75 return 0;
76 }
77
78
79 int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len,
80 const unsigned char *iv, aes_context_t *ctx) {
81
82 int evp_len;
83 int evp_plaintext_len;
84
85 if(1 == EVP_DecryptInit_ex(ctx->dec_ctx, ctx->cipher, NULL, ctx->key, iv)) {
86 if(1 == EVP_CIPHER_CTX_set_padding(ctx->dec_ctx, 0)) {
87 if(1 == EVP_DecryptUpdate(ctx->dec_ctx, out, &evp_len, in, in_len)) {
88 evp_plaintext_len = evp_len;
89 if(1 == EVP_DecryptFinal_ex(ctx->dec_ctx, out + evp_len, &evp_len)) {
90 evp_plaintext_len += evp_len;
91 if(evp_plaintext_len != in_len)
92 traceEvent(TRACE_ERROR, "aes_cbc_decrypt openssl decryption: decrypted %u bytes where %u were expected",
93 evp_plaintext_len, in_len);
94 } else
95 traceEvent(TRACE_ERROR, "aes_cbc_decrypt openssl final decryption: %s",
96 openssl_err_as_string());
97 } else
98 traceEvent(TRACE_ERROR, "aes_cbc_decrypt openssl decrpytion: %s",
99 openssl_err_as_string());
100 } else
101 traceEvent(TRACE_ERROR, "aes_cbc_decrypt openssl padding setup: %s",
102 openssl_err_as_string());
103 } else
104 traceEvent(TRACE_ERROR, "aes_cbc_decrypt openssl init: %s",
105 openssl_err_as_string());
106
107 EVP_CIPHER_CTX_reset(ctx->dec_ctx);
108
109 return 0;
110 }
111
112
113 int aes_ecb_decrypt (unsigned char *out, const unsigned char *in, aes_context_t *ctx) {
114
115 AES_ecb_encrypt(in, out, &(ctx->ecb_dec_key), AES_DECRYPT);
116
117 return 0;
118 }
119
120
121 int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
122
123 // allocate context...
124 *ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t));
125 if(!(*ctx))
126 return -1;
127
128 // ...and fill her up:
129
130 // initialize data structures
131 if(!((*ctx)->enc_ctx = EVP_CIPHER_CTX_new())) {
132 traceEvent(TRACE_ERROR, "aes_init openssl's evp_* encryption context creation failed: %s",
133 openssl_err_as_string());
134 return -1;
135 }
136
137 if(!((*ctx)->dec_ctx = EVP_CIPHER_CTX_new())) {
138 traceEvent(TRACE_ERROR, "aes_init openssl's evp_* decryption context creation failed: %s",
139 openssl_err_as_string());
140 return -1;
141 }
142
143 // check key size and make key size (given in bytes) dependant settings
144 switch(key_size) {
145 case AES128_KEY_BYTES: // 128 bit key size
146 (*ctx)->cipher = EVP_aes_128_cbc();
147 break;
148 case AES192_KEY_BYTES: // 192 bit key size
149 (*ctx)->cipher = EVP_aes_192_cbc();
150 break;
151 case AES256_KEY_BYTES: // 256 bit key size
152 (*ctx)->cipher = EVP_aes_256_cbc();
153 break;
154 default:
155 traceEvent(TRACE_ERROR, "aes_init invalid key size %u\n", key_size);
156 return -1;
157 }
158
159 // key materiel handling
160 memcpy((*ctx)->key, key, key_size);
161 AES_set_decrypt_key(key, key_size * 8, &((*ctx)->ecb_dec_key));
162
163 return 0;
164 }
165
166
167 #elif defined (__AES__) && defined (__SSE2__) // Intel's AES-NI ---------------------------------------------------
168
169
170 // inspired by https://gist.github.com/acapola/d5b940da024080dfaf5f
171 // furthered by the help of Sebastian Ramacher's implementation found at
172 // https://chromium.googlesource.com/external/github.com/dlitz/pycrypto/+/junk/master/src/AESNI.c
173 // modified along Intel's white paper on AES Instruction Set
174 // https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf
175
176
177 static __m128i aes128_keyexpand(__m128i key, __m128i keygened, uint8_t shuf) {
178
179 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
180 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
181 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
182
183 // unfortunately, shuffle expects immediate argument, thus the not-so-stylish switch ...
184 // REVISIT: either macrorize this whole function (and perhaps the following one) or
185 // use shuffle_epi8 (which would require SSSE3 instead of SSE2)
186 switch(shuf) {
187 case 0x55:
188 keygened = _mm_shuffle_epi32(keygened, 0x55 );
189 break;
190 case 0xaa:
191 keygened = _mm_shuffle_epi32(keygened, 0xaa );
192 break;
193 case 0xff:
194 keygened = _mm_shuffle_epi32(keygened, 0xff );
195 break;
196 default:
197 break;
198 }
199
200 return _mm_xor_si128(key, keygened);
201 }
202
203
204 static __m128i aes192_keyexpand_2(__m128i key, __m128i key2) {
205
206 key = _mm_shuffle_epi32(key, 0xff);
207 key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
208
209 return _mm_xor_si128(key, key2);
210 }
211
212
213 #define KEYEXP128(K, I) aes128_keyexpand (K, _mm_aeskeygenassist_si128(K, I), 0xff)
214 #define KEYEXP192(K1, K2, I) aes128_keyexpand (K1, _mm_aeskeygenassist_si128(K2, I), 0x55)
215 #define KEYEXP192_2(K1, K2) aes192_keyexpand_2(K1, K2)
216 #define KEYEXP256(K1, K2, I) aes128_keyexpand (K1, _mm_aeskeygenassist_si128(K2, I), 0xff)
217 #define KEYEXP256_2(K1, K2) aes128_keyexpand (K1, _mm_aeskeygenassist_si128(K2, 0x00), 0xaa)
218
219
220 // key setup
221 static int aes_internal_key_setup (aes_context_t *ctx, const uint8_t *key, int key_bits) {
222
223 // number of rounds
224 ctx->Nr = 6 + (key_bits / 32);
225
226 // encryption keys
227 switch(key_bits) {
228 case 128: {
229 ctx->rk_enc[ 0] = _mm_loadu_si128((const __m128i*)key);
230 ctx->rk_enc[ 1] = KEYEXP128(ctx->rk_enc[0], 0x01);
231 ctx->rk_enc[ 2] = KEYEXP128(ctx->rk_enc[1], 0x02);
232 ctx->rk_enc[ 3] = KEYEXP128(ctx->rk_enc[2], 0x04);
233 ctx->rk_enc[ 4] = KEYEXP128(ctx->rk_enc[3], 0x08);
234 ctx->rk_enc[ 5] = KEYEXP128(ctx->rk_enc[4], 0x10);
235 ctx->rk_enc[ 6] = KEYEXP128(ctx->rk_enc[5], 0x20);
236 ctx->rk_enc[ 7] = KEYEXP128(ctx->rk_enc[6], 0x40);
237 ctx->rk_enc[ 8] = KEYEXP128(ctx->rk_enc[7], 0x80);
238 ctx->rk_enc[ 9] = KEYEXP128(ctx->rk_enc[8], 0x1B);
239 ctx->rk_enc[10] = KEYEXP128(ctx->rk_enc[9], 0x36);
240 break;
241 }
242 case 192: {
243 __m128i temp[2];
244 ctx->rk_enc[ 0] = _mm_loadu_si128((const __m128i*) key);
245
246 ctx->rk_enc[ 1] = _mm_loadu_si128((const __m128i*) (key+16));
247 temp[0] = KEYEXP192(ctx->rk_enc[0], ctx->rk_enc[1], 0x01);
248 temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[1]);
249 ctx->rk_enc[ 1] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[1], (__m128d)temp[0], 0);
250
251 ctx->rk_enc[ 2] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
252 ctx->rk_enc[ 3] = KEYEXP192(temp[0], temp[1], 0x02);
253
254 ctx->rk_enc[ 4] = KEYEXP192_2(ctx->rk_enc[3], temp[1]);
255 temp[0] = KEYEXP192(ctx->rk_enc[3], ctx->rk_enc[4], 0x04);
256 temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[4]);
257 ctx->rk_enc[ 4] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[4], (__m128d)temp[0], 0);
258
259 ctx->rk_enc[ 5] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
260 ctx->rk_enc[ 6] = KEYEXP192(temp[0], temp[1], 0x08);
261
262 ctx->rk_enc[ 7] = KEYEXP192_2(ctx->rk_enc[6], temp[1]);
263 temp[0] = KEYEXP192(ctx->rk_enc[6], ctx->rk_enc[7], 0x10);
264 temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[7]);
265 ctx->rk_enc[ 7] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[7], (__m128d)temp[0], 0);
266
267 ctx->rk_enc[ 8] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
268 ctx->rk_enc[ 9] = KEYEXP192(temp[0], temp[1], 0x20);
269
270 ctx->rk_enc[10] = KEYEXP192_2(ctx->rk_enc[9], temp[1]);
271 temp[0] = KEYEXP192(ctx->rk_enc[9], ctx->rk_enc[10], 0x40);
272 temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[10]);
273 ctx->rk_enc[10] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[10], (__m128d) temp[0], 0);
274
275 ctx->rk_enc[11] = (__m128i)_mm_shuffle_pd((__m128d)temp[0],(__m128d) temp[1], 1);
276 ctx->rk_enc[12] = KEYEXP192(temp[0], temp[1], 0x80);
277 break;
278 }
279 case 256: {
280 ctx->rk_enc[ 0] = _mm_loadu_si128((const __m128i*) key);
281 ctx->rk_enc[ 1] = _mm_loadu_si128((const __m128i*) (key+16));
282 ctx->rk_enc[ 2] = KEYEXP256(ctx->rk_enc[0], ctx->rk_enc[1], 0x01);
283 ctx->rk_enc[ 3] = KEYEXP256_2(ctx->rk_enc[1], ctx->rk_enc[2]);
284 ctx->rk_enc[ 4] = KEYEXP256(ctx->rk_enc[2], ctx->rk_enc[3], 0x02);
285 ctx->rk_enc[ 5] = KEYEXP256_2(ctx->rk_enc[3], ctx->rk_enc[4]);
286 ctx->rk_enc[ 6] = KEYEXP256(ctx->rk_enc[4], ctx->rk_enc[5], 0x04);
287 ctx->rk_enc[ 7] = KEYEXP256_2(ctx->rk_enc[5], ctx->rk_enc[6]);
288 ctx->rk_enc[ 8] = KEYEXP256(ctx->rk_enc[6], ctx->rk_enc[7], 0x08);
289 ctx->rk_enc[ 9] = KEYEXP256_2(ctx->rk_enc[7], ctx->rk_enc[8]);
290 ctx->rk_enc[10] = KEYEXP256(ctx->rk_enc[8], ctx->rk_enc[9], 0x10);
291 ctx->rk_enc[11] = KEYEXP256_2(ctx->rk_enc[9], ctx->rk_enc[10]);
292 ctx->rk_enc[12] = KEYEXP256(ctx->rk_enc[10], ctx->rk_enc[11], 0x20);
293 ctx->rk_enc[13] = KEYEXP256_2(ctx->rk_enc[11], ctx->rk_enc[12]);
294 ctx->rk_enc[14] = KEYEXP256(ctx->rk_enc[12], ctx->rk_enc[13], 0x40);
295 break;
296 }
297 }
298
299 // derive decryption keys
300 for(int i = 1; i < ctx->Nr; ++i) {
301 ctx->rk_dec[ctx->Nr - i] = _mm_aesimc_si128(ctx->rk_enc[i]);
302 }
303 ctx->rk_dec[ 0] = ctx->rk_enc[ctx->Nr];
304
305 return ctx->Nr;
306 }
307
308
309 static void aes_internal_encrypt (aes_context_t *ctx, const uint8_t pt[16], uint8_t ct[16]) {
310
311 __m128i tmp = _mm_loadu_si128((__m128i*)pt);
312
313 tmp = _mm_xor_si128 (tmp, ctx->rk_enc[ 0]);
314 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 1]);
315 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 2]);
316 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 3]);
317 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 4]);
318 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 5]);
319 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 6]);
320 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 7]);
321 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 8]);
322 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 9]);
323 if(ctx->Nr > 10) {
324 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[10]);
325 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[11]);
326 if(ctx->Nr > 12) {
327 tmp = _mm_aesenc_si128(tmp, ctx->rk_enc[12]);
328 tmp = _mm_aesenc_si128(tmp, ctx->rk_enc[13]);
329 }
330 }
331 tmp = _mm_aesenclast_si128 (tmp, ctx->rk_enc[ctx->Nr]);
332
333 _mm_storeu_si128((__m128i*) ct, tmp);
334 }
335
336
337 static void aes_internal_decrypt (aes_context_t *ctx, const uint8_t ct[16], uint8_t pt[16]) {
338
339 __m128i tmp = _mm_loadu_si128((__m128i*)ct);
340
341 tmp = _mm_xor_si128 (tmp, ctx->rk_dec[ 0]);
342 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 1]);
343 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 2]);
344 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 3]);
345 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 4]);
346 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 5]);
347 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 6]);
348 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 7]);
349 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 8]);
350 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 9]);
351 if(ctx->Nr > 10) {
352 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[10]);
353 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[11]);
354 if(ctx->Nr > 12) {
355 tmp = _mm_aesdec_si128(tmp, ctx->rk_dec[12]);
356 tmp = _mm_aesdec_si128(tmp, ctx->rk_dec[13]);
357 }
358 }
359 tmp = _mm_aesdeclast_si128 (tmp, ctx->rk_enc[ 0]);
360
361 _mm_storeu_si128((__m128i*) pt, tmp);
362 }
363
364
365 // public API
366
367
368 int aes_ecb_decrypt (unsigned char *out, const unsigned char *in, aes_context_t *ctx) {
369
370 aes_internal_decrypt(ctx, in, out);
371
372 return AES_BLOCK_SIZE;
373 }
374
375
376 // not used
377 int aes_ecb_encrypt (unsigned char *out, const unsigned char *in, aes_context_t *ctx) {
378
379 aes_internal_encrypt(ctx, in, out);
380
381 return AES_BLOCK_SIZE;
382 }
383
384
385 int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len,
386 const unsigned char *iv, aes_context_t *ctx) {
387
388 int n; /* number of blocks */
389 int ret = (int)in_len & 15; /* remainder */
390
391 __m128i ivec = _mm_loadu_si128((__m128i*)iv);
392
393 for(n = in_len / 16; n != 0; n--) {
394 __m128i tmp = _mm_loadu_si128((__m128i*)in);
395 in += 16;
396 tmp = _mm_xor_si128(tmp, ivec);
397
398 tmp = _mm_xor_si128 (tmp, ctx->rk_enc[ 0]);
399 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 1]);
400 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 2]);
401 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 3]);
402 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 4]);
403 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 5]);
404 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 6]);
405 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 7]);
406 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 8]);
407 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[ 9]);
408 if(ctx->Nr > 10) {
409 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[10]);
410 tmp = _mm_aesenc_si128 (tmp, ctx->rk_enc[11]);
411 if(ctx->Nr > 12) {
412 tmp = _mm_aesenc_si128(tmp, ctx->rk_enc[12]);
413 tmp = _mm_aesenc_si128(tmp, ctx->rk_enc[13]);
414 }
415 }
416 tmp = _mm_aesenclast_si128 (tmp, ctx->rk_enc[ctx->Nr]);
417
418 ivec = tmp;
419
420 _mm_storeu_si128((__m128i*)out, tmp);
421 out += 16;
422 }
423
424 return ret;
425 }
426
427
428 int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len,
429 const unsigned char *iv, aes_context_t *ctx) {
430
431 int n; /* number of blocks */
432 int ret = (int)in_len & 15; /* remainder */
433
434 __m128i ivec = _mm_loadu_si128((__m128i*)iv);
435
436 // 4 parallel rails of AES decryption to reduce data dependencies in x86's deep pipelines
437 for(n = in_len / 16; n > 3; n -=4) {
438 __m128i tmp1 = _mm_loadu_si128((__m128i*)in); in += 16;
439 __m128i tmp2 = _mm_loadu_si128((__m128i*)in); in += 16;
440 __m128i tmp3 = _mm_loadu_si128((__m128i*)in); in += 16;
441 __m128i tmp4 = _mm_loadu_si128((__m128i*)in); in += 16;
442
443 __m128i old_in1 = tmp1;
444 __m128i old_in2 = tmp2;
445 __m128i old_in3 = tmp3;
446 __m128i old_in4 = tmp4;
447
448 tmp1 = _mm_xor_si128 (tmp1, ctx->rk_dec[ 0]); tmp2 = _mm_xor_si128 (tmp2, ctx->rk_dec[ 0]);
449 tmp3 = _mm_xor_si128 (tmp3, ctx->rk_dec[ 0]); tmp4 = _mm_xor_si128 (tmp4, ctx->rk_dec[ 0]);
450
451 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 1]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 1]);
452 tmp3 = _mm_aesdec_si128 (tmp3, ctx->rk_dec[ 1]); tmp4 = _mm_aesdec_si128 (tmp4, ctx->rk_dec[ 1]);
453
454 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 2]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 2]);
455 tmp3 = _mm_aesdec_si128 (tmp3, ctx->rk_dec[ 2]); tmp4 = _mm_aesdec_si128 (tmp4, ctx->rk_dec[ 2]);
456
457 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 3]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 3]);
458 tmp3 = _mm_aesdec_si128 (tmp3, ctx->rk_dec[ 3]); tmp4 = _mm_aesdec_si128 (tmp4, ctx->rk_dec[ 3]);
459
460 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 4]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 4]);
461 tmp3 = _mm_aesdec_si128 (tmp3, ctx->rk_dec[ 4]); tmp4 = _mm_aesdec_si128 (tmp4, ctx->rk_dec[ 4]);
462
463 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 5]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 5]);
464 tmp3 = _mm_aesdec_si128 (tmp3, ctx->rk_dec[ 5]); tmp4 = _mm_aesdec_si128 (tmp4, ctx->rk_dec[ 5]);
465
466 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 6]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 6]);
467 tmp3 = _mm_aesdec_si128 (tmp3, ctx->rk_dec[ 6]); tmp4 = _mm_aesdec_si128 (tmp4, ctx->rk_dec[ 6]);
468
469 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 7]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 7]);
470 tmp3 = _mm_aesdec_si128 (tmp3, ctx->rk_dec[ 7]); tmp4 = _mm_aesdec_si128 (tmp4, ctx->rk_dec[ 7]);
471
472 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 8]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 8]);
473 tmp3 = _mm_aesdec_si128 (tmp3, ctx->rk_dec[ 8]); tmp4 = _mm_aesdec_si128 (tmp4, ctx->rk_dec[ 8]);
474
475 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 9]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 9]);
476 tmp3 = _mm_aesdec_si128 (tmp3, ctx->rk_dec[ 9]); tmp4 = _mm_aesdec_si128 (tmp4, ctx->rk_dec[ 9]);
477
478 if(ctx->Nr > 10) {
479 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[10]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[10]);
480 tmp3 = _mm_aesdec_si128 (tmp3, ctx->rk_dec[10]); tmp4 = _mm_aesdec_si128 (tmp4, ctx->rk_dec[10]);
481
482 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[11]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[11]);
483 tmp3 = _mm_aesdec_si128 (tmp3, ctx->rk_dec[11]); tmp4 = _mm_aesdec_si128 (tmp4, ctx->rk_dec[11]);
484
485 if(ctx->Nr > 12) {
486 tmp1 = _mm_aesdec_si128(tmp1, ctx->rk_dec[12]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[12]);
487 tmp3 = _mm_aesdec_si128(tmp3, ctx->rk_dec[12]); tmp4 = _mm_aesdec_si128 (tmp4, ctx->rk_dec[12]);
488
489 tmp1 = _mm_aesdec_si128(tmp1, ctx->rk_dec[13]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[13]);
490 tmp3 = _mm_aesdec_si128(tmp3, ctx->rk_dec[13]); tmp4 = _mm_aesdec_si128 (tmp4, ctx->rk_dec[13]);
491 }
492 }
493 tmp1 = _mm_aesdeclast_si128(tmp1, ctx->rk_enc[ 0]); tmp2 = _mm_aesdeclast_si128(tmp2, ctx->rk_enc[ 0]);
494 tmp3 = _mm_aesdeclast_si128(tmp3, ctx->rk_enc[ 0]); tmp4 = _mm_aesdeclast_si128(tmp4, ctx->rk_enc[ 0]);
495
496 tmp1 = _mm_xor_si128 (tmp1, ivec); tmp2 = _mm_xor_si128 (tmp2, old_in1);
497 tmp3 = _mm_xor_si128 (tmp3, old_in2); tmp4 = _mm_xor_si128 (tmp4, old_in3);
498
499 ivec = old_in4;
500
501 _mm_storeu_si128((__m128i*) out, tmp1); out += 16;
502 _mm_storeu_si128((__m128i*) out, tmp2); out += 16;
503 _mm_storeu_si128((__m128i*) out, tmp3); out += 16;
504 _mm_storeu_si128((__m128i*) out, tmp4); out += 16;
505 }
506 // now: less than 4 blocks remaining
507
508 // if 2 or 3 blocks remaining --> this code handles two of them
509 if(n > 1) {
510 n-= 2;
511
512 __m128i tmp1 = _mm_loadu_si128((__m128i*)in); in += 16;
513 __m128i tmp2 = _mm_loadu_si128((__m128i*)in); in += 16;
514
515 __m128i old_in1 = tmp1;
516 __m128i old_in2 = tmp2;
517
518 tmp1 = _mm_xor_si128 (tmp1, ctx->rk_dec[ 0]); tmp2 = _mm_xor_si128 (tmp2, ctx->rk_dec[ 0]);
519 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 1]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 1]);
520 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 2]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 2]);
521 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 3]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 3]);
522 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 4]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 4]);
523 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 5]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 5]);
524 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 6]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 6]);
525 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 7]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 7]);
526 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 8]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 8]);
527 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[ 9]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[ 9]);
528 if(ctx->Nr > 10) {
529 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[10]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[10]);
530 tmp1 = _mm_aesdec_si128 (tmp1, ctx->rk_dec[11]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[11]);
531 if(ctx->Nr > 12) {
532 tmp1 = _mm_aesdec_si128(tmp1, ctx->rk_dec[12]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[12]);
533 tmp1 = _mm_aesdec_si128(tmp1, ctx->rk_dec[13]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[13]);
534 }
535 }
536 tmp1 = _mm_aesdeclast_si128 (tmp1, ctx->rk_enc[ 0]); tmp2 = _mm_aesdeclast_si128(tmp2, ctx->rk_enc[ 0]);
537
538 tmp1 = _mm_xor_si128 (tmp1, ivec); tmp2 = _mm_xor_si128 (tmp2, old_in1);
539
540 ivec = old_in2;
541
542 _mm_storeu_si128((__m128i*) out, tmp1); out += 16;
543 _mm_storeu_si128((__m128i*) out, tmp2); out += 16;
544 }
545
546 // one block remaining
547 if(n) {
548 __m128i tmp = _mm_loadu_si128((__m128i*)in);
549
550 tmp = _mm_xor_si128 (tmp, ctx->rk_dec[ 0]);
551 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 1]);
552 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 2]);
553 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 3]);
554 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 4]);
555 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 5]);
556 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 6]);
557 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 7]);
558 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 8]);
559 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[ 9]);
560 if(ctx->Nr > 10) {
561 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[10]);
562 tmp = _mm_aesdec_si128 (tmp, ctx->rk_dec[11]);
563 if(ctx->Nr > 12) {
564 tmp = _mm_aesdec_si128(tmp, ctx->rk_dec[12]);
565 tmp = _mm_aesdec_si128(tmp, ctx->rk_dec[13]);
566 }
567 }
568 tmp = _mm_aesdeclast_si128 (tmp, ctx->rk_enc[ 0]);
569
570 tmp = _mm_xor_si128 (tmp, ivec);
571
572 _mm_storeu_si128((__m128i*) out, tmp);
573 }
574
575 return ret;
576 }
577
578
579 int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
580
581 // allocate context...
582 *ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t));
583 if(!(*ctx))
584 return -1;
585 // ...and fill her up:
586
587 // initialize data structures
588
589 // check key size and make key size (given in bytes) dependant settings
590 switch(key_size) {
591 case AES128_KEY_BYTES: // 128 bit key size
592 break;
593 case AES192_KEY_BYTES: // 192 bit key size
594 break;
595 case AES256_KEY_BYTES: // 256 bit key size
596 break;
597 default:
598 traceEvent(TRACE_ERROR, "aes_init invalid key size %u\n", key_size);
599 return -1;
600 }
601
602 // key materiel handling
603 aes_internal_key_setup ( *ctx, key, 8 * key_size);
604
605 return 0;
606 }
607
608
609 #else // plain C --------------------------------------------------------------------------
610
611
612 // rijndael-alg-fst.c version 3.0 (December 2000)
613 // optimised ANSI C code for the Rijndael cipher (now AES)
614 // original authors: Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
615 // Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
616 // Paulo Barreto <paulo.barreto@terra.com.br>
617 //
618 // was put in the public domain, taken (and modified) from
619 // https://fastcrypto.org/front/misc/rijndael-alg-fst.c
620
621
622 // Te0[x] = S [x].[02, 01, 01, 03];
623 static const uint32_t Te0[256] = {
624 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
625 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
626 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U, 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
627 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU, 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
628 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU, 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
629 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U, 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
630 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU, 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
631 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU, 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
632 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU, 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
633 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU, 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
634 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU, 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
635 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU, 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
636 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U, 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
637 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U, 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
638 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU, 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
639 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U, 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
640 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU, 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
641 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U, 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
642 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU, 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
643 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU, 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
644 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU, 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
645 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U, 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
646 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U, 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
647 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U, 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
648 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U, 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
649 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U, 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
650 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU, 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
651 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U, 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
652 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U, 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
653 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U, 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
654 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U, 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
655 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U, 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU };
656
657 // Te1[x] = S [x].[03, 02, 01, 01];
658 static const uint32_t Te1[256] = {
659 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU, 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
660 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU, 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
661 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU, 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
662 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU, 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
663 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U, 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
664 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U, 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
665 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U, 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
666 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U, 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
667 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU, 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
668 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U, 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
669 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU, 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
670 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U, 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
671 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU, 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
672 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU, 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
673 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU, 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
674 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U, 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
675 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU, 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
676 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU, 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
677 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU, 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
678 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U, 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
679 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU, 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
680 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U, 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
681 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU, 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
682 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU, 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
683 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU, 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
684 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU, 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
685 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U, 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
686 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U, 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
687 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U, 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
688 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U, 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
689 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU, 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
690 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU, 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U };
691
692 // Te2[x] = S [x].[01, 03, 02, 01];
693 static const uint32_t Te2[256] = {
694 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU, 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
695 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU, 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
696 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU, 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
697 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU, 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
698 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U, 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
699 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U, 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
700 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U, 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
701 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U, 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
702 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU, 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
703 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U, 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
704 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU, 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
705 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U, 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
706 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU, 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
707 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU, 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
708 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU, 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
709 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U, 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
710 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU, 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
711 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU, 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
712 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU, 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
713 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U, 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
714 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU, 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
715 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U, 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
716 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU, 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
717 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU, 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
718 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU, 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
719 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU, 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
720 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U, 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
721 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U, 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
722 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U, 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
723 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U, 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
724 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU, 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
725 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU, 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U };
726
727 // Te3[x] = S [x].[01, 01, 03, 02];
728 static const uint32_t Te3[256] = {
729 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U, 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
730 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U, 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
731 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU, 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
732 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U, 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
733 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU, 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
734 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U, 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
735 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU, 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
736 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU, 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
737 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U, 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
738 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU, 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
739 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U, 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
740 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U, 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
741 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU, 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
742 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU, 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
743 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U, 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
744 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U, 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
745 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U, 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
746 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU, 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
747 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U, 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
748 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U, 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
749 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U, 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
750 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U, 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
751 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU, 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
752 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU, 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
753 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU, 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
754 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU, 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
755 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU, 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
756 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U, 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
757 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U, 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
758 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U, 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
759 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU, 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
760 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU, 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU };
761
762 // Te4[x] = S [x].[01, 01, 01, 01];
763 static const uint32_t Te4[256] = {
764 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU, 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
765 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU, 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
766 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU, 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
767 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU, 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
768 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U, 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
769 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U, 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
770 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U, 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
771 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U, 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
772 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU, 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
773 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U, 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
774 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU, 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
775 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U, 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
776 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU, 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
777 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU, 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
778 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU, 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
779 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U, 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
780 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU, 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
781 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU, 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
782 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU, 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
783 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U, 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
784 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU, 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
785 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U, 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
786 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU, 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
787 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU, 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
788 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU, 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
789 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU, 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
790 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U, 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
791 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U, 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
792 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U, 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
793 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U, 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
794 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU, 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
795 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU, 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U };
796
797 // Td0[x] = Si[x].[0e, 09, 0d, 0b];
798 static const uint32_t Td0[256] = {
799 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
800 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
801 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U, 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
802 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU, 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
803 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU, 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
804 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U, 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
805 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U, 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
806 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U, 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
807 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U, 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
808 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U, 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
809 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U, 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
810 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U, 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
811 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U, 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
812 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U, 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
813 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU, 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
814 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU, 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
815 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU, 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
816 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U, 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
817 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU, 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
818 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U, 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
819 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U, 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
820 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U, 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
821 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U, 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
822 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U, 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
823 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U, 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
824 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U, 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
825 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU, 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
826 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU, 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
827 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U, 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
828 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU, 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
829 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU, 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
830 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U, 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U };
831
832 // Td1[x] = Si[x].[0b, 0e, 09, 0d];
833 static const uint32_t Td1[256] = {
834 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU, 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
835 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU, 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
836 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U, 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
837 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U, 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
838 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U, 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
839 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU, 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
840 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U, 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
841 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U, 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
842 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U, 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
843 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU, 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
844 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU, 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
845 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU, 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
846 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U, 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
847 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U, 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
848 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U, 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
849 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U, 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
850 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U, 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
851 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U, 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
852 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U, 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
853 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U, 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
854 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU, 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
855 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U, 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
856 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U, 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
857 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU, 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
858 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU, 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
859 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U, 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
860 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U, 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
861 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U, 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
862 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU, 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
863 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U, 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
864 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U, 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
865 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U, 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U };
866
867 // Td2[x] = Si[x].[0d, 0b, 0e, 09];
868 static const uint32_t Td2[256] = {
869 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U, 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
870 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U, 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
871 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU, 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
872 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U, 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
873 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U, 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
874 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U, 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
875 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU, 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
876 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U, 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
877 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U, 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
878 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U, 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
879 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U, 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
880 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U, 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
881 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U, 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
882 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU, 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
883 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU, 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
884 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU, 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
885 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU, 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
886 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U, 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
887 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U, 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
888 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U, 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
889 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U, 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
890 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU, 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
891 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U, 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
892 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU, 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
893 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U, 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
894 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U, 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
895 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U, 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
896 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU, 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
897 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U, 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
898 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U, 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
899 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U, 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
900 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U, 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U };
901
902 // Td3[x] = Si[x].[09, 0d, 0b, 0e];
903 static const uint32_t Td3[256] = {
904 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU, 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
905 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U, 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
906 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU, 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
907 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U, 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
908 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U, 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
909 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U, 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
910 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U, 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
911 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U, 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
912 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU, 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
913 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U, 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
914 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU, 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
915 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U, 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
916 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U, 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
917 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U, 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
918 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU, 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
919 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U, 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
920 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U, 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
921 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U, 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
922 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U, 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
923 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U, 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
924 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U, 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
925 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U, 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
926 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU, 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
927 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U, 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
928 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U, 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
929 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U, 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
930 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U, 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
931 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU, 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
932 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU, 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
933 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U, 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
934 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U, 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
935 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U };
936
937 // Td4[x] = Si[x].[01, 01, 01, 01];
938 static const uint32_t Td4[256] = {
939 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U, 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
940 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU, 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
941 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U, 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
942 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U, 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
943 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U, 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
944 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU, 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
945 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U, 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
946 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U, 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
947 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U, 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
948 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU, 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
949 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U, 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
950 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U, 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
951 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U, 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
952 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U, 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
953 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU, 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
954 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U, 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
955 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U, 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
956 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU, 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
957 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U, 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
958 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U, 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
959 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U, 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
960 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU, 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
961 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU, 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
962 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU, 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
963 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U, 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
964 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U, 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
965 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U, 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
966 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU, 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
967 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU, 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
968 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU, 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
969 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU, 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
970 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U, 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU };
971
972 // for 128-bit blocks, Rijndael never uses more than 10 rcon values
973 static const uint32_t rcon[] = {
974 0x01000000, 0x02000000, 0x04000000, 0x08000000,
975 0x10000000, 0x20000000, 0x40000000, 0x80000000,
976 0x1B000000, 0x36000000 };
977
978
979 #define GETU32(p) (be32toh((*((uint32_t*)(p)))))
980 #define PUTU32(ct, st) { *((uint32_t*)(ct)) = htobe32((st)); }
981
982 #define b0(x) ((uint8_t)(x))
983 #define b1(x) ((uint8_t)((x) >> 8))
984 #define b2(x) ((uint8_t)((x) >> 16))
985 #define b3(x) ((uint8_t)((x) >> 24))
986
987 #define m0(x) ((x) & 0x000000ff)
988 #define m1(x) ((x) & 0x0000ff00)
989 #define m2(x) ((x) & 0x00ff0000)
990 #define m3(x) ((x) & 0xff000000)
991
992
993 // expand the cipher key into the encryption key schedule and
994 // return the number of rounds for the given cipher key size
995 static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) {
996
997 int i = 0;
998 uint32_t temp;
999
1000 rk[0] = GETU32(cipherKey );
1001 rk[1] = GETU32(cipherKey + 4);
1002 rk[2] = GETU32(cipherKey + 8);
1003 rk[3] = GETU32(cipherKey + 12);
1004 if(keyBits == 128) {
1005 for(;;) {
1006 temp = rk[3];
1007 rk[4] = rk[0] ^
1008 (Te4[b2(temp)] & 0xff000000) ^
1009 (Te4[b1(temp)] & 0x00ff0000) ^
1010 (Te4[b0(temp)] & 0x0000ff00) ^
1011 (Te4[b3(temp)] & 0x000000ff) ^
1012 rcon[i];
1013 rk[5] = rk[1] ^ rk[4];
1014 rk[6] = rk[2] ^ rk[5];
1015 rk[7] = rk[3] ^ rk[6];
1016 if(++i == 10) {
1017 return 10;
1018 }
1019 rk += 4;
1020 }
1021 }
1022 rk[4] = GETU32(cipherKey + 16);
1023 rk[5] = GETU32(cipherKey + 20);
1024 if(keyBits == 192) {
1025 for(;;) {
1026 temp = rk[ 5];
1027 rk[ 6] = rk[ 0] ^
1028 (Te4[b2(temp)] & 0xff000000) ^
1029 (Te4[b1(temp)] & 0x00ff0000) ^
1030 (Te4[b0(temp)] & 0x0000ff00) ^
1031 (Te4[b3(temp)] & 0x000000ff) ^
1032 rcon[i];
1033 rk[ 7] = rk[ 1] ^ rk[ 6];
1034 rk[ 8] = rk[ 2] ^ rk[ 7];
1035 rk[ 9] = rk[ 3] ^ rk[ 8];
1036 if(++i == 8) {
1037 return 12;
1038 }
1039 rk[10] = rk[ 4] ^ rk[ 9];
1040 rk[11] = rk[ 5] ^ rk[10];
1041 rk += 6;
1042 }
1043 }
1044 rk[6] = GETU32(cipherKey + 24);
1045 rk[7] = GETU32(cipherKey + 28);
1046 if(keyBits == 256) {
1047 for(;;) {
1048 temp = rk[ 7];
1049 rk[ 8] = rk[ 0] ^
1050 (Te4[b2(temp)] & 0xff000000) ^
1051 (Te4[b1(temp)] & 0x00ff0000) ^
1052 (Te4[b0(temp)] & 0x0000ff00) ^
1053 (Te4[b3(temp)] & 0x000000ff) ^
1054 rcon[i];
1055 rk[ 9] = rk[ 1] ^ rk[ 8];
1056 rk[10] = rk[ 2] ^ rk[ 9];
1057 rk[11] = rk[ 3] ^ rk[10];
1058 if(++i == 7) {
1059 return 14;
1060 }
1061 temp = rk[11];
1062 rk[12] = rk[ 4] ^
1063 (Te4[b3(temp)] & 0xff000000) ^
1064 (Te4[b2(temp)] & 0x00ff0000) ^
1065 (Te4[b1(temp)] & 0x0000ff00) ^
1066 (Te4[b0(temp)] & 0x000000ff);
1067 rk[13] = rk[ 5] ^ rk[12];
1068 rk[14] = rk[ 6] ^ rk[13];
1069 rk[15] = rk[ 7] ^ rk[14];
1070 rk += 8;
1071 }
1072 }
1073
1074 return 0;
1075 }
1076
1077
1078 #define INVMIXCOLRK(n) rk[n] = Td0[b0(Te4[b3(rk[n])])] ^ Td1[b0(Te4[b2(rk[n])])] ^ Td2[b0(Te4[b1(rk[n])])] ^ Td3[b0(Te4[b0(rk[n])])]
1079
1080
1081 // expand the cipher key into the decryption key schedule and
1082 // return the number of rounds for the given cipher key size
1083 static int aes_internal_key_setup_dec (uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) {
1084
1085 int Nr, i, j;
1086 uint32_t temp;
1087
1088 // expand the cipher key
1089 Nr = aes_internal_key_setup_enc(rk, cipherKey, keyBits);
1090 // invert the order of the round keys
1091 for(i = 0, j = 4*Nr; i < j; i += 4, j -= 4) {
1092 temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
1093 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
1094 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
1095 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
1096 }
1097
1098 // apply the inverse MixColumn transform to all round keys but the first and the last
1099 for(i = 1; i < Nr; i++) {
1100 rk += 4;
1101 INVMIXCOLRK(0);
1102 INVMIXCOLRK(1);
1103 INVMIXCOLRK(2);
1104 INVMIXCOLRK(3);
1105 }
1106
1107 return Nr;
1108 }
1109
1110
1111 #define AES_ENC_ROUND(DST, SRC, round) \
1112 DST##0 = Te0[b3(SRC##0)] ^ Te1[b2(SRC##1)] ^ Te2[b1(SRC##2)] ^ Te3[b0(SRC##3)] ^ rk[4 * round + 0]; \
1113 DST##1 = Te0[b3(SRC##1)] ^ Te1[b2(SRC##2)] ^ Te2[b1(SRC##3)] ^ Te3[b0(SRC##0)] ^ rk[4 * round + 1]; \
1114 DST##2 = Te0[b3(SRC##2)] ^ Te1[b2(SRC##3)] ^ Te2[b1(SRC##0)] ^ Te3[b0(SRC##1)] ^ rk[4 * round + 2]; \
1115 DST##3 = Te0[b3(SRC##3)] ^ Te1[b2(SRC##0)] ^ Te2[b1(SRC##1)] ^ Te3[b0(SRC##2)] ^ rk[4 * round + 3];
1116
1117
1118 static void aes_internal_encrypt (const uint32_t rk[/*4*(Nr + 1)*/], int Nr, const uint8_t pt[16], uint8_t ct[16]) {
1119
1120 uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
1121
1122 // map byte array block to cipher state and add initial round key
1123 s0 = GETU32(pt ) ^ rk[0];
1124 s1 = GETU32(pt + 4) ^ rk[1];
1125 s2 = GETU32(pt + 8) ^ rk[2];
1126 s3 = GETU32(pt + 12) ^ rk[3];
1127
1128 AES_ENC_ROUND(t, s, 1);
1129 AES_ENC_ROUND(s, t, 2);
1130 AES_ENC_ROUND(t, s, 3);
1131 AES_ENC_ROUND(s, t, 4);
1132 AES_ENC_ROUND(t, s, 5);
1133 AES_ENC_ROUND(s, t, 6);
1134 AES_ENC_ROUND(t, s, 7);
1135 AES_ENC_ROUND(s, t, 8);
1136 AES_ENC_ROUND(t, s, 9);
1137
1138 if(Nr > 10) {
1139 AES_ENC_ROUND(s, t, 10);
1140 AES_ENC_ROUND(t, s, 11);
1141 if(Nr > 12) {
1142 AES_ENC_ROUND(s, t, 12);
1143 AES_ENC_ROUND(t, s, 13);
1144 }
1145 }
1146
1147 rk += Nr << 2;
1148 // apply last round and map cipher state to byte array block
1149 s0 = m3(Te4[b3(t0)]) ^ m2(Te4[b2(t1)]) ^ m1(Te4[b1(t2)]) ^ m0(Te4[b0(t3)]) ^ rk[0];
1150 PUTU32(ct , s0);
1151 s1 = m3(Te4[b3(t1)]) ^ m2(Te4[b2(t2)]) ^ m1(Te4[b1(t3)]) ^ m0(Te4[b0(t0)]) ^ rk[1];
1152 PUTU32(ct + 4, s1);
1153 s2 = m3(Te4[b3(t2)]) ^ m2(Te4[b2(t3)]) ^ m1(Te4[b1(t0)]) ^ m0(Te4[b0(t1)]) ^ rk[2];
1154 PUTU32(ct + 8, s2);
1155 s3 = m3(Te4[b3(t3)]) ^ m2(Te4[b2(t0)]) ^ m1(Te4[b1(t1)]) ^ m0(Te4[b0(t2)]) ^ rk[3];
1156 PUTU32(ct + 12, s3);
1157 }
1158
1159
1160 #define AES_DEC_ROUND(DST, SRC, round) \
1161 DST##0 = Td0[b3(SRC##0)] ^ Td1[b2(SRC##3)] ^ Td2[b1(SRC##2)] ^ Td3[b0(SRC##1)] ^ rk[4 * round + 0]; \
1162 DST##1 = Td0[b3(SRC##1)] ^ Td1[b2(SRC##0)] ^ Td2[b1(SRC##3)] ^ Td3[b0(SRC##2)] ^ rk[4 * round + 1]; \
1163 DST##2 = Td0[b3(SRC##2)] ^ Td1[b2(SRC##1)] ^ Td2[b1(SRC##0)] ^ Td3[b0(SRC##3)] ^ rk[4 * round + 2]; \
1164 DST##3 = Td0[b3(SRC##3)] ^ Td1[b2(SRC##2)] ^ Td2[b1(SRC##1)] ^ Td3[b0(SRC##0)] ^ rk[4 * round + 3];
1165
1166
1167 static void aes_internal_decrypt (const uint32_t rk[/*4*(Nr + 1)*/], int Nr, const uint8_t ct[16], uint8_t pt[16]) {
1168
1169 uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
1170
1171 // map byte array block to cipher state and add initial round key
1172 s0 = GETU32(ct ) ^ rk[0];
1173 s1 = GETU32(ct + 4) ^ rk[1];
1174 s2 = GETU32(ct + 8) ^ rk[2];
1175 s3 = GETU32(ct + 12) ^ rk[3];
1176
1177 AES_DEC_ROUND(t, s, 1);
1178 AES_DEC_ROUND(s, t, 2);
1179 AES_DEC_ROUND(t, s, 3);
1180 AES_DEC_ROUND(s, t, 4);
1181 AES_DEC_ROUND(t, s, 5);
1182 AES_DEC_ROUND(s, t, 6);
1183 AES_DEC_ROUND(t, s, 7);
1184 AES_DEC_ROUND(s, t, 8);
1185 AES_DEC_ROUND(t, s, 9);
1186
1187 if(Nr > 10) {
1188 AES_DEC_ROUND(s, t, 10);
1189 AES_DEC_ROUND(t, s, 11);
1190 if(Nr > 12) {
1191 AES_DEC_ROUND(s, t, 12);
1192 AES_DEC_ROUND(t, s, 13);
1193 }
1194 }
1195
1196 rk += Nr << 2;
1197 // apply last round and map cipher state to byte array block
1198 s0 = m3(Td4[b3(t0)]) ^ m2(Td4[b2(t3)]) ^ m1(Td4[b1(t2)]) ^ m0(Td4[b0(t1)]) ^ rk[0];
1199 PUTU32(pt , s0);
1200 s1 = m3(Td4[b3(t1)]) ^ m2(Td4[b2(t0)]) ^ m1(Td4[b1(t3)]) ^ m0(Td4[b0(t2)]) ^ rk[1];
1201 PUTU32(pt + 4, s1);
1202 s2 = m3(Td4[b3(t2)]) ^ m2(Td4[b2(t1)]) ^ m1(Td4[b1(t0)]) ^ m0(Td4[b0(t3)]) ^ rk[2];
1203 PUTU32(pt + 8, s2);
1204 s3 = m3(Td4[b3(t3)]) ^ m2(Td4[b2(t2)]) ^ m1(Td4[b1(t1)]) ^ m0(Td4[b0(t0)]) ^ rk[3];
1205 PUTU32(pt + 12, s3);
1206 }
1207
1208
1209 // public API
1210
1211
1212 int aes_ecb_decrypt (unsigned char *out, const unsigned char *in, aes_context_t *ctx) {
1213
1214 aes_internal_decrypt(ctx->dec_rk, ctx->Nr, in, out);
1215
1216 return AES_BLOCK_SIZE;
1217 }
1218
1219
1220 // not used
1221 int aes_ecb_encrypt (unsigned char *out, const unsigned char *in, aes_context_t *ctx) {
1222
1223 aes_internal_encrypt(ctx->enc_rk, ctx->Nr, in, out);
1224
1225 return AES_BLOCK_SIZE;
1226 }
1227
1228
1229 #define fix_xor(target, source) *(uint32_t*)&(target)[0] = *(uint32_t*)&(target)[0] ^ *(uint32_t*)&(source)[0]; *(uint32_t*)&(target)[4] = *(uint32_t*)&(target)[4] ^ *(uint32_t*)&(source)[4]; \
1230 *(uint32_t*)&(target)[8] = *(uint32_t*)&(target)[8] ^ *(uint32_t*)&(source)[8]; *(uint32_t*)&(target)[12] = *(uint32_t*)&(target)[12] ^ *(uint32_t*)&(source)[12];
1231
1232
1233 int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len,
1234 const unsigned char *iv, aes_context_t *ctx) {
1235
1236 uint8_t tmp[AES_BLOCK_SIZE];
1237 size_t i;
1238 size_t n;
1239
1240 memcpy(tmp, iv, AES_BLOCK_SIZE);
1241
1242 n = in_len / AES_BLOCK_SIZE;
1243 for(i=0; i < n; i++) {
1244 fix_xor(tmp, &in[i * AES_BLOCK_SIZE]);
1245 aes_internal_encrypt(ctx->enc_rk, ctx->Nr, tmp, tmp);
1246 memcpy(&out[i * AES_BLOCK_SIZE], tmp, AES_BLOCK_SIZE);
1247 }
1248
1249 return n * AES_BLOCK_SIZE;
1250 }
1251
1252
1253 int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len,
1254 const unsigned char *iv, aes_context_t *ctx) {
1255
1256 uint8_t tmp[AES_BLOCK_SIZE];
1257 uint8_t old[AES_BLOCK_SIZE];
1258 size_t i;
1259 size_t n;
1260
1261 memcpy(tmp, iv, AES_BLOCK_SIZE);
1262
1263 n = in_len / AES_BLOCK_SIZE;
1264 for(i=0; i < n; i++) {
1265 memcpy(old, &in[i * AES_BLOCK_SIZE], AES_BLOCK_SIZE);
1266 aes_internal_decrypt(ctx->dec_rk, ctx->Nr, &in[i * AES_BLOCK_SIZE], &out[i * AES_BLOCK_SIZE]);
1267 fix_xor(&out[i * AES_BLOCK_SIZE], tmp);
1268 memcpy(tmp, old, AES_BLOCK_SIZE);
1269 }
1270
1271 return n * AES_BLOCK_SIZE;
1272 }
1273
1274
1275 int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
1276
1277 // allocate context...
1278 *ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t));
1279 if(!(*ctx))
1280 return -1;
1281 // ...and fill her up:
1282
1283 // initialize data structures
1284
1285 // check key size and make key size (given in bytes) dependant settings
1286 switch(key_size) {
1287 case AES128_KEY_BYTES: // 128 bit key size
1288 break;
1289 case AES192_KEY_BYTES: // 192 bit key size
1290 break;
1291 case AES256_KEY_BYTES: // 256 bit key size
1292 break;
1293 default:
1294 traceEvent(TRACE_ERROR, "aes_init invalid key size %u\n", key_size);
1295 return -1;
1296 }
1297
1298 // key materiel handling
1299 (*ctx)->Nr = aes_internal_key_setup_enc((*ctx)->enc_rk/*[4*(Nr + 1)]*/, key, 8 * key_size);
1300 aes_internal_key_setup_dec((*ctx)->dec_rk/*[4*(Nr + 1)]*/, key, 8 * key_size);
1301 return 0;
1302 }
1303
1304
1305 #endif // openSSL 1.1, AES-NI, plain C ----------------------------------------------------------------------------
1306
1307
1308 int aes_deinit (aes_context_t *ctx) {
1309
1310 if(ctx) free(ctx);
1311
1312 return 0;
1313 }