"Fossies" - the Fresh Open Source Software Archive

Member "n2n-3.1.1/src/aes.c" (31 Mar 2022, 72993 Bytes) of package /linux/misc/n2n-3.1.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "aes.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 3.0_vs_3.1.1.

    1 /**
    2  * (C) 2007-22 - ntop.org and contributors
    3  *
    4  * This program is free software; you can redistribute it and/or modify
    5  * it under the terms of the GNU General Public License as published by
    6  * the Free Software Foundation; either version 3 of the License, or
    7  * (at your option) any later version.
    8  *
    9  * This program is distributed in the hope that it will be useful,
   10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12  * GNU General Public License for more details.
   13  *
   14  * You should have received a copy of the GNU General Public License
   15  * along with this program; if not see see <http://www.gnu.org/licenses/>
   16  *
   17  */
   18 
   19 
   20 #include "n2n.h"
   21 
   22 
   23 #if defined (HAVE_OPENSSL_1_1) // openSSL 1.1 ---------------------------------------------------------------------
   24 
   25 
   26 // get any erorr message out of openssl
   27 // taken from https://en.wikibooks.org/wiki/OpenSSL/Error_handling
   28 static char *openssl_err_as_string (void) {
   29 
   30     BIO *bio = BIO_new (BIO_s_mem ());
   31     ERR_print_errors (bio);
   32     char *buf = NULL;
   33     size_t len = BIO_get_mem_data (bio, &buf);
   34     char *ret = (char *) calloc (1, 1 + len);
   35 
   36     if(ret)
   37         memcpy (ret, buf, len);
   38 
   39     BIO_free (bio);
   40 
   41     return ret;
   42 }
   43 
   44 
   45 int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len,
   46                      const unsigned char *iv, aes_context_t *ctx) {
   47 
   48     int evp_len;
   49     int evp_ciphertext_len;
   50 
   51     if(1 == EVP_EncryptInit_ex(ctx->enc_ctx, ctx->cipher, NULL, ctx->key, iv)) {
   52         if(1 == EVP_CIPHER_CTX_set_padding(ctx->enc_ctx, 0)) {
   53             if(1 == EVP_EncryptUpdate(ctx->enc_ctx, out, &evp_len, in, in_len)) {
   54                 evp_ciphertext_len = evp_len;
   55                 if(1 == EVP_EncryptFinal_ex(ctx->enc_ctx, out + evp_len, &evp_len)) {
   56                     evp_ciphertext_len += evp_len;
   57                     if(evp_ciphertext_len != in_len)
   58                         traceEvent(TRACE_ERROR, "aes_cbc_encrypt openssl encryption: encrypted %u bytes where %u were expected",
   59                                                 evp_ciphertext_len, in_len);
   60                 } else
   61                     traceEvent(TRACE_ERROR, "aes_cbc_encrypt openssl final encryption: %s",
   62                                             openssl_err_as_string());
   63             } else
   64                 traceEvent(TRACE_ERROR, "aes_cbc_encrypt openssl encrpytion: %s",
   65                                         openssl_err_as_string());
   66         } else
   67             traceEvent(TRACE_ERROR, "aes_cbc_encrypt openssl padding setup: %s",
   68                                     openssl_err_as_string());
   69     } else
   70         traceEvent(TRACE_ERROR, "aes_cbc_encrypt openssl init: %s",
   71                                 openssl_err_as_string());
   72 
   73     EVP_CIPHER_CTX_reset(ctx->enc_ctx);
   74 
   75     return 0;
   76 }
   77 
   78 
   79 int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len,
   80                      const unsigned char *iv, aes_context_t *ctx) {
   81 
   82     int evp_len;
   83     int evp_plaintext_len;
   84 
   85     if(1 == EVP_DecryptInit_ex(ctx->dec_ctx, ctx->cipher, NULL, ctx->key, iv)) {
   86         if(1 == EVP_CIPHER_CTX_set_padding(ctx->dec_ctx, 0)) {
   87             if(1 == EVP_DecryptUpdate(ctx->dec_ctx, out, &evp_len, in, in_len)) {
   88                 evp_plaintext_len = evp_len;
   89                 if(1 == EVP_DecryptFinal_ex(ctx->dec_ctx, out + evp_len, &evp_len)) {
   90                     evp_plaintext_len += evp_len;
   91                     if(evp_plaintext_len != in_len)
   92                         traceEvent(TRACE_ERROR, "aes_cbc_decrypt openssl decryption: decrypted %u bytes where %u were expected",
   93                                                 evp_plaintext_len, in_len);
   94                 } else
   95                     traceEvent(TRACE_ERROR, "aes_cbc_decrypt openssl final decryption: %s",
   96                                             openssl_err_as_string());
   97             } else
   98                 traceEvent(TRACE_ERROR, "aes_cbc_decrypt openssl decrpytion: %s",
   99                                         openssl_err_as_string());
  100         } else
  101             traceEvent(TRACE_ERROR, "aes_cbc_decrypt openssl padding setup: %s",
  102                                     openssl_err_as_string());
  103     } else
  104         traceEvent(TRACE_ERROR, "aes_cbc_decrypt openssl init: %s",
  105                                 openssl_err_as_string());
  106 
  107     EVP_CIPHER_CTX_reset(ctx->dec_ctx);
  108 
  109     return 0;
  110 }
  111 
  112 
  113 int aes_ecb_decrypt (unsigned char *out, const unsigned char *in, aes_context_t *ctx) {
  114 
  115     AES_ecb_encrypt(in, out, &(ctx->ecb_dec_key), AES_DECRYPT);
  116 
  117     return 0;
  118 }
  119 
  120 
  121 int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
  122 
  123     // allocate context...
  124     *ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t));
  125     if(!(*ctx))
  126         return -1;
  127 
  128     // ...and fill her up:
  129 
  130     // initialize data structures
  131     if(!((*ctx)->enc_ctx = EVP_CIPHER_CTX_new())) {
  132         traceEvent(TRACE_ERROR, "aes_init openssl's evp_* encryption context creation failed: %s",
  133                                 openssl_err_as_string());
  134         return -1;
  135     }
  136 
  137     if(!((*ctx)->dec_ctx = EVP_CIPHER_CTX_new())) {
  138         traceEvent(TRACE_ERROR, "aes_init openssl's evp_* decryption context creation failed: %s",
  139                                 openssl_err_as_string());
  140         return -1;
  141     }
  142 
  143     // check key size and make key size (given in bytes) dependant settings
  144     switch(key_size) {
  145         case AES128_KEY_BYTES:    // 128 bit key size
  146             (*ctx)->cipher = EVP_aes_128_cbc();
  147             break;
  148         case AES192_KEY_BYTES:    // 192 bit key size
  149             (*ctx)->cipher = EVP_aes_192_cbc();
  150             break;
  151         case AES256_KEY_BYTES:    // 256 bit key size
  152             (*ctx)->cipher = EVP_aes_256_cbc();
  153             break;
  154         default:
  155             traceEvent(TRACE_ERROR, "aes_init invalid key size %u\n", key_size);
  156             return -1;
  157     }
  158 
  159     // key materiel handling
  160     memcpy((*ctx)->key, key, key_size);
  161     AES_set_decrypt_key(key, key_size * 8, &((*ctx)->ecb_dec_key));
  162 
  163     return 0;
  164 }
  165 
  166 
  167 #elif defined (__AES__) && defined (__SSE2__) // Intel's AES-NI ---------------------------------------------------
  168 
  169 
  170 // inspired by https://gist.github.com/acapola/d5b940da024080dfaf5f
  171 // furthered by the help of Sebastian Ramacher's implementation found at
  172 // https://chromium.googlesource.com/external/github.com/dlitz/pycrypto/+/junk/master/src/AESNI.c
  173 // modified along Intel's white paper on AES Instruction Set
  174 // https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf
  175 
  176 
  177 static __m128i aes128_keyexpand(__m128i key, __m128i keygened, uint8_t shuf) {
  178 
  179     key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
  180     key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
  181     key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
  182 
  183     // unfortunately, shuffle expects immediate argument, thus the not-so-stylish switch ...
  184     // REVISIT: either macrorize this whole function (and perhaps the following one) or
  185     //          use shuffle_epi8 (which would require SSSE3 instead of SSE2)
  186     switch(shuf) {
  187         case 0x55:
  188             keygened = _mm_shuffle_epi32(keygened, 0x55 );
  189             break;
  190         case 0xaa:
  191             keygened = _mm_shuffle_epi32(keygened, 0xaa );
  192             break;
  193         case 0xff:
  194             keygened = _mm_shuffle_epi32(keygened, 0xff );
  195             break;
  196         default:
  197             break;
  198     }
  199 
  200     return _mm_xor_si128(key, keygened);
  201 }
  202 
  203 
  204 static __m128i aes192_keyexpand_2(__m128i key, __m128i key2) {
  205 
  206     key = _mm_shuffle_epi32(key, 0xff);
  207     key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
  208 
  209     return _mm_xor_si128(key, key2);
  210 }
  211 
  212 
  213 #define KEYEXP128(K, I)      aes128_keyexpand  (K,  _mm_aeskeygenassist_si128(K,  I),    0xff)
  214 #define KEYEXP192(K1, K2, I) aes128_keyexpand  (K1, _mm_aeskeygenassist_si128(K2, I),    0x55)
  215 #define KEYEXP192_2(K1, K2)  aes192_keyexpand_2(K1, K2)
  216 #define KEYEXP256(K1, K2, I) aes128_keyexpand  (K1, _mm_aeskeygenassist_si128(K2, I),    0xff)
  217 #define KEYEXP256_2(K1, K2)  aes128_keyexpand  (K1, _mm_aeskeygenassist_si128(K2, 0x00), 0xaa)
  218 
  219 
  220 // key setup
  221 static int aes_internal_key_setup (aes_context_t *ctx, const uint8_t *key, int key_bits) {
  222 
  223     // number of rounds
  224     ctx->Nr = 6 + (key_bits / 32);
  225 
  226     // encryption keys
  227     switch(key_bits) {
  228         case 128: {
  229             ctx->rk_enc[ 0] = _mm_loadu_si128((const __m128i*)key);
  230             ctx->rk_enc[ 1] = KEYEXP128(ctx->rk_enc[0], 0x01);
  231             ctx->rk_enc[ 2] = KEYEXP128(ctx->rk_enc[1], 0x02);
  232             ctx->rk_enc[ 3] = KEYEXP128(ctx->rk_enc[2], 0x04);
  233             ctx->rk_enc[ 4] = KEYEXP128(ctx->rk_enc[3], 0x08);
  234             ctx->rk_enc[ 5] = KEYEXP128(ctx->rk_enc[4], 0x10);
  235             ctx->rk_enc[ 6] = KEYEXP128(ctx->rk_enc[5], 0x20);
  236             ctx->rk_enc[ 7] = KEYEXP128(ctx->rk_enc[6], 0x40);
  237             ctx->rk_enc[ 8] = KEYEXP128(ctx->rk_enc[7], 0x80);
  238             ctx->rk_enc[ 9] = KEYEXP128(ctx->rk_enc[8], 0x1B);
  239             ctx->rk_enc[10] = KEYEXP128(ctx->rk_enc[9], 0x36);
  240             break;
  241         }
  242         case 192: {
  243             __m128i temp[2];
  244             ctx->rk_enc[ 0] = _mm_loadu_si128((const __m128i*) key);
  245 
  246             ctx->rk_enc[ 1] = _mm_loadu_si128((const __m128i*) (key+16));
  247             temp[0] = KEYEXP192(ctx->rk_enc[0], ctx->rk_enc[1], 0x01);
  248             temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[1]);
  249             ctx->rk_enc[ 1] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[1], (__m128d)temp[0], 0);
  250 
  251             ctx->rk_enc[ 2] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
  252             ctx->rk_enc[ 3] = KEYEXP192(temp[0], temp[1], 0x02);
  253 
  254             ctx->rk_enc[ 4] = KEYEXP192_2(ctx->rk_enc[3], temp[1]);
  255             temp[0] = KEYEXP192(ctx->rk_enc[3], ctx->rk_enc[4], 0x04);
  256             temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[4]);
  257             ctx->rk_enc[ 4] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[4], (__m128d)temp[0], 0);
  258 
  259             ctx->rk_enc[ 5] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
  260             ctx->rk_enc[ 6] = KEYEXP192(temp[0], temp[1], 0x08);
  261 
  262             ctx->rk_enc[ 7] = KEYEXP192_2(ctx->rk_enc[6], temp[1]);
  263             temp[0] = KEYEXP192(ctx->rk_enc[6], ctx->rk_enc[7], 0x10);
  264             temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[7]);
  265             ctx->rk_enc[ 7] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[7], (__m128d)temp[0], 0);
  266 
  267             ctx->rk_enc[ 8] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
  268             ctx->rk_enc[ 9] = KEYEXP192(temp[0], temp[1], 0x20);
  269 
  270             ctx->rk_enc[10] = KEYEXP192_2(ctx->rk_enc[9], temp[1]);
  271             temp[0] = KEYEXP192(ctx->rk_enc[9], ctx->rk_enc[10], 0x40);
  272             temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[10]);
  273             ctx->rk_enc[10] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[10], (__m128d) temp[0], 0);
  274 
  275             ctx->rk_enc[11] = (__m128i)_mm_shuffle_pd((__m128d)temp[0],(__m128d) temp[1], 1);
  276             ctx->rk_enc[12] = KEYEXP192(temp[0], temp[1], 0x80);
  277             break;
  278         }
  279         case 256: {
  280             ctx->rk_enc[ 0] = _mm_loadu_si128((const __m128i*) key);
  281             ctx->rk_enc[ 1] = _mm_loadu_si128((const __m128i*) (key+16));
  282             ctx->rk_enc[ 2] = KEYEXP256(ctx->rk_enc[0], ctx->rk_enc[1], 0x01);
  283             ctx->rk_enc[ 3] = KEYEXP256_2(ctx->rk_enc[1], ctx->rk_enc[2]);
  284             ctx->rk_enc[ 4] = KEYEXP256(ctx->rk_enc[2], ctx->rk_enc[3], 0x02);
  285             ctx->rk_enc[ 5] = KEYEXP256_2(ctx->rk_enc[3], ctx->rk_enc[4]);
  286             ctx->rk_enc[ 6] = KEYEXP256(ctx->rk_enc[4], ctx->rk_enc[5], 0x04);
  287             ctx->rk_enc[ 7] = KEYEXP256_2(ctx->rk_enc[5], ctx->rk_enc[6]);
  288             ctx->rk_enc[ 8] = KEYEXP256(ctx->rk_enc[6], ctx->rk_enc[7], 0x08);
  289             ctx->rk_enc[ 9] = KEYEXP256_2(ctx->rk_enc[7], ctx->rk_enc[8]);
  290             ctx->rk_enc[10] = KEYEXP256(ctx->rk_enc[8], ctx->rk_enc[9], 0x10);
  291             ctx->rk_enc[11] = KEYEXP256_2(ctx->rk_enc[9], ctx->rk_enc[10]);
  292             ctx->rk_enc[12] = KEYEXP256(ctx->rk_enc[10], ctx->rk_enc[11], 0x20);
  293             ctx->rk_enc[13] = KEYEXP256_2(ctx->rk_enc[11], ctx->rk_enc[12]);
  294             ctx->rk_enc[14] = KEYEXP256(ctx->rk_enc[12], ctx->rk_enc[13], 0x40);
  295             break;
  296         }
  297     }
  298 
  299     // derive decryption keys
  300     for(int i = 1; i < ctx->Nr; ++i) {
  301         ctx->rk_dec[ctx->Nr - i] = _mm_aesimc_si128(ctx->rk_enc[i]);
  302     }
  303     ctx->rk_dec[ 0] = ctx->rk_enc[ctx->Nr];
  304 
  305     return ctx->Nr;
  306 }
  307 
  308 
  309 static void aes_internal_encrypt (aes_context_t *ctx, const uint8_t pt[16], uint8_t ct[16]) {
  310 
  311     __m128i tmp = _mm_loadu_si128((__m128i*)pt);
  312 
  313     tmp = _mm_xor_si128           (tmp, ctx->rk_enc[ 0]);
  314     tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 1]);
  315     tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 2]);
  316     tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 3]);
  317     tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 4]);
  318     tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 5]);
  319     tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 6]);
  320     tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 7]);
  321     tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 8]);
  322     tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 9]);
  323     if(ctx->Nr > 10) {
  324         tmp = _mm_aesenc_si128    (tmp, ctx->rk_enc[10]);
  325         tmp = _mm_aesenc_si128    (tmp, ctx->rk_enc[11]);
  326         if(ctx->Nr > 12) {
  327             tmp = _mm_aesenc_si128(tmp, ctx->rk_enc[12]);
  328             tmp = _mm_aesenc_si128(tmp, ctx->rk_enc[13]);
  329         }
  330     }
  331     tmp = _mm_aesenclast_si128    (tmp, ctx->rk_enc[ctx->Nr]);
  332 
  333     _mm_storeu_si128((__m128i*) ct, tmp);
  334 }
  335 
  336 
  337 static void aes_internal_decrypt (aes_context_t *ctx, const uint8_t ct[16], uint8_t pt[16]) {
  338 
  339     __m128i tmp = _mm_loadu_si128((__m128i*)ct);
  340 
  341     tmp = _mm_xor_si128           (tmp, ctx->rk_dec[ 0]);
  342     tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 1]);
  343     tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 2]);
  344     tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 3]);
  345     tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 4]);
  346     tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 5]);
  347     tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 6]);
  348     tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 7]);
  349     tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 8]);
  350     tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 9]);
  351     if(ctx->Nr > 10) {
  352         tmp = _mm_aesdec_si128    (tmp, ctx->rk_dec[10]);
  353         tmp = _mm_aesdec_si128    (tmp, ctx->rk_dec[11]);
  354         if(ctx->Nr > 12) {
  355             tmp = _mm_aesdec_si128(tmp, ctx->rk_dec[12]);
  356             tmp = _mm_aesdec_si128(tmp, ctx->rk_dec[13]);
  357         }
  358     }
  359     tmp = _mm_aesdeclast_si128    (tmp, ctx->rk_enc[ 0]);
  360 
  361     _mm_storeu_si128((__m128i*) pt, tmp);
  362 }
  363 
  364 
  365 // public API
  366 
  367 
  368 int aes_ecb_decrypt (unsigned char *out, const unsigned char *in, aes_context_t *ctx) {
  369 
  370     aes_internal_decrypt(ctx, in, out);
  371 
  372     return AES_BLOCK_SIZE;
  373 }
  374 
  375 
  376 // not used
  377 int aes_ecb_encrypt (unsigned char *out, const unsigned char *in, aes_context_t *ctx) {
  378 
  379     aes_internal_encrypt(ctx, in, out);
  380 
  381     return AES_BLOCK_SIZE;
  382 }
  383 
  384 
  385 int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len,
  386                      const unsigned char *iv, aes_context_t *ctx) {
  387 
  388     int n;                       /* number of blocks */
  389     int ret = (int)in_len & 15;  /* remainder        */
  390 
  391     __m128i ivec = _mm_loadu_si128((__m128i*)iv);
  392 
  393     for(n = in_len / 16; n != 0; n--) {
  394         __m128i tmp = _mm_loadu_si128((__m128i*)in);
  395         in += 16;
  396         tmp = _mm_xor_si128(tmp, ivec);
  397 
  398         tmp = _mm_xor_si128           (tmp, ctx->rk_enc[ 0]);
  399         tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 1]);
  400         tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 2]);
  401         tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 3]);
  402         tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 4]);
  403         tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 5]);
  404         tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 6]);
  405         tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 7]);
  406         tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 8]);
  407         tmp = _mm_aesenc_si128        (tmp, ctx->rk_enc[ 9]);
  408         if(ctx->Nr > 10) {
  409             tmp = _mm_aesenc_si128    (tmp, ctx->rk_enc[10]);
  410             tmp = _mm_aesenc_si128    (tmp, ctx->rk_enc[11]);
  411             if(ctx->Nr > 12) {
  412                 tmp = _mm_aesenc_si128(tmp, ctx->rk_enc[12]);
  413                 tmp = _mm_aesenc_si128(tmp, ctx->rk_enc[13]);
  414             }
  415         }
  416         tmp = _mm_aesenclast_si128    (tmp, ctx->rk_enc[ctx->Nr]);
  417 
  418         ivec = tmp;
  419 
  420         _mm_storeu_si128((__m128i*)out, tmp);
  421         out += 16;
  422     }
  423 
  424     return ret;
  425 }
  426 
  427 
  428 int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len,
  429                      const unsigned char *iv, aes_context_t *ctx) {
  430 
  431     int n;                       /* number of blocks */
  432     int ret = (int)in_len & 15;  /* remainder        */
  433 
  434     __m128i ivec = _mm_loadu_si128((__m128i*)iv);
  435 
  436     // 4 parallel rails of AES decryption to reduce data dependencies in x86's deep pipelines
  437     for(n = in_len / 16; n > 3; n -=4) {
  438         __m128i tmp1 = _mm_loadu_si128((__m128i*)in); in += 16;
  439         __m128i tmp2 = _mm_loadu_si128((__m128i*)in); in += 16;
  440         __m128i tmp3 = _mm_loadu_si128((__m128i*)in); in += 16;
  441         __m128i tmp4 = _mm_loadu_si128((__m128i*)in); in += 16;
  442 
  443         __m128i old_in1 = tmp1;
  444         __m128i old_in2 = tmp2;
  445         __m128i old_in3 = tmp3;
  446         __m128i old_in4 = tmp4;
  447 
  448         tmp1 = _mm_xor_si128           (tmp1, ctx->rk_dec[ 0]); tmp2 = _mm_xor_si128       (tmp2, ctx->rk_dec[ 0]);
  449         tmp3 = _mm_xor_si128           (tmp3, ctx->rk_dec[ 0]); tmp4 = _mm_xor_si128       (tmp4, ctx->rk_dec[ 0]);
  450 
  451         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 1]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 1]);
  452         tmp3 = _mm_aesdec_si128        (tmp3, ctx->rk_dec[ 1]); tmp4 = _mm_aesdec_si128    (tmp4, ctx->rk_dec[ 1]);
  453 
  454         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 2]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 2]);
  455         tmp3 = _mm_aesdec_si128        (tmp3, ctx->rk_dec[ 2]); tmp4 = _mm_aesdec_si128    (tmp4, ctx->rk_dec[ 2]);
  456 
  457         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 3]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 3]);
  458         tmp3 = _mm_aesdec_si128        (tmp3, ctx->rk_dec[ 3]); tmp4 = _mm_aesdec_si128    (tmp4, ctx->rk_dec[ 3]);
  459 
  460         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 4]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 4]);
  461         tmp3 = _mm_aesdec_si128        (tmp3, ctx->rk_dec[ 4]); tmp4 = _mm_aesdec_si128    (tmp4, ctx->rk_dec[ 4]);
  462 
  463         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 5]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 5]);
  464         tmp3 = _mm_aesdec_si128        (tmp3, ctx->rk_dec[ 5]); tmp4 = _mm_aesdec_si128    (tmp4, ctx->rk_dec[ 5]);
  465 
  466         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 6]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 6]);
  467         tmp3 = _mm_aesdec_si128        (tmp3, ctx->rk_dec[ 6]); tmp4 = _mm_aesdec_si128    (tmp4, ctx->rk_dec[ 6]);
  468 
  469         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 7]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 7]);
  470         tmp3 = _mm_aesdec_si128        (tmp3, ctx->rk_dec[ 7]); tmp4 = _mm_aesdec_si128    (tmp4, ctx->rk_dec[ 7]);
  471 
  472         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 8]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 8]);
  473         tmp3 = _mm_aesdec_si128        (tmp3, ctx->rk_dec[ 8]); tmp4 = _mm_aesdec_si128    (tmp4, ctx->rk_dec[ 8]);
  474 
  475         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 9]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 9]);
  476         tmp3 = _mm_aesdec_si128        (tmp3, ctx->rk_dec[ 9]); tmp4 = _mm_aesdec_si128    (tmp4, ctx->rk_dec[ 9]);
  477 
  478         if(ctx->Nr > 10) {
  479             tmp1 = _mm_aesdec_si128    (tmp1, ctx->rk_dec[10]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[10]);
  480             tmp3 = _mm_aesdec_si128    (tmp3, ctx->rk_dec[10]); tmp4 = _mm_aesdec_si128    (tmp4, ctx->rk_dec[10]);
  481 
  482             tmp1 = _mm_aesdec_si128    (tmp1, ctx->rk_dec[11]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[11]);
  483             tmp3 = _mm_aesdec_si128    (tmp3, ctx->rk_dec[11]); tmp4 = _mm_aesdec_si128    (tmp4, ctx->rk_dec[11]);
  484 
  485             if(ctx->Nr > 12) {
  486                 tmp1 = _mm_aesdec_si128(tmp1, ctx->rk_dec[12]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[12]);
  487                 tmp3 = _mm_aesdec_si128(tmp3, ctx->rk_dec[12]); tmp4 = _mm_aesdec_si128    (tmp4, ctx->rk_dec[12]);
  488 
  489                 tmp1 = _mm_aesdec_si128(tmp1, ctx->rk_dec[13]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[13]);
  490                 tmp3 = _mm_aesdec_si128(tmp3, ctx->rk_dec[13]); tmp4 = _mm_aesdec_si128    (tmp4, ctx->rk_dec[13]);
  491             }
  492         }
  493         tmp1 =     _mm_aesdeclast_si128(tmp1, ctx->rk_enc[ 0]); tmp2 = _mm_aesdeclast_si128(tmp2, ctx->rk_enc[ 0]);
  494         tmp3 =     _mm_aesdeclast_si128(tmp3, ctx->rk_enc[ 0]); tmp4 = _mm_aesdeclast_si128(tmp4, ctx->rk_enc[ 0]);
  495 
  496         tmp1 = _mm_xor_si128 (tmp1, ivec); tmp2 = _mm_xor_si128 (tmp2, old_in1);
  497         tmp3 = _mm_xor_si128 (tmp3, old_in2); tmp4 = _mm_xor_si128 (tmp4, old_in3);
  498 
  499         ivec = old_in4;
  500 
  501         _mm_storeu_si128((__m128i*) out, tmp1); out += 16;
  502         _mm_storeu_si128((__m128i*) out, tmp2); out += 16;
  503         _mm_storeu_si128((__m128i*) out, tmp3); out += 16;
  504         _mm_storeu_si128((__m128i*) out, tmp4); out += 16;
  505     }
  506     // now: less than 4 blocks remaining
  507 
  508     // if 2 or 3 blocks remaining --> this code handles two of them
  509     if(n > 1) {
  510         n-= 2;
  511 
  512         __m128i tmp1 = _mm_loadu_si128((__m128i*)in); in += 16;
  513         __m128i tmp2 = _mm_loadu_si128((__m128i*)in); in += 16;
  514 
  515         __m128i old_in1 = tmp1;
  516         __m128i old_in2 = tmp2;
  517 
  518         tmp1 = _mm_xor_si128           (tmp1, ctx->rk_dec[ 0]); tmp2 = _mm_xor_si128       (tmp2, ctx->rk_dec[ 0]);
  519         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 1]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 1]);
  520         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 2]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 2]);
  521         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 3]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 3]);
  522         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 4]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 4]);
  523         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 5]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 5]);
  524         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 6]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 6]);
  525         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 7]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 7]);
  526         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 8]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 8]);
  527         tmp1 = _mm_aesdec_si128        (tmp1, ctx->rk_dec[ 9]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[ 9]);
  528         if(ctx->Nr > 10) {
  529             tmp1 = _mm_aesdec_si128    (tmp1, ctx->rk_dec[10]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[10]);
  530             tmp1 = _mm_aesdec_si128    (tmp1, ctx->rk_dec[11]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[11]);
  531             if(ctx->Nr > 12) {
  532                 tmp1 = _mm_aesdec_si128(tmp1, ctx->rk_dec[12]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[12]);
  533                 tmp1 = _mm_aesdec_si128(tmp1, ctx->rk_dec[13]); tmp2 = _mm_aesdec_si128    (tmp2, ctx->rk_dec[13]);
  534             }
  535         }
  536         tmp1 = _mm_aesdeclast_si128    (tmp1, ctx->rk_enc[ 0]); tmp2 = _mm_aesdeclast_si128(tmp2, ctx->rk_enc[ 0]);
  537 
  538         tmp1 = _mm_xor_si128 (tmp1, ivec); tmp2 = _mm_xor_si128 (tmp2, old_in1);
  539 
  540         ivec = old_in2;
  541 
  542         _mm_storeu_si128((__m128i*) out, tmp1); out += 16;
  543         _mm_storeu_si128((__m128i*) out, tmp2); out += 16;
  544     }
  545 
  546     // one block remaining
  547     if(n) {
  548         __m128i tmp = _mm_loadu_si128((__m128i*)in);
  549 
  550         tmp = _mm_xor_si128           (tmp, ctx->rk_dec[ 0]);
  551         tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 1]);
  552         tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 2]);
  553         tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 3]);
  554         tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 4]);
  555         tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 5]);
  556         tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 6]);
  557         tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 7]);
  558         tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 8]);
  559         tmp = _mm_aesdec_si128        (tmp, ctx->rk_dec[ 9]);
  560         if(ctx->Nr > 10) {
  561             tmp = _mm_aesdec_si128    (tmp, ctx->rk_dec[10]);
  562             tmp = _mm_aesdec_si128    (tmp, ctx->rk_dec[11]);
  563             if(ctx->Nr > 12) {
  564                 tmp = _mm_aesdec_si128(tmp, ctx->rk_dec[12]);
  565                 tmp = _mm_aesdec_si128(tmp, ctx->rk_dec[13]);
  566             }
  567         }
  568         tmp = _mm_aesdeclast_si128    (tmp, ctx->rk_enc[ 0]);
  569 
  570         tmp = _mm_xor_si128 (tmp, ivec);
  571 
  572         _mm_storeu_si128((__m128i*) out, tmp);
  573     }
  574 
  575     return ret;
  576 }
  577 
  578 
  579 int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
  580 
  581     // allocate context...
  582     *ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t));
  583     if(!(*ctx))
  584         return -1;
  585     // ...and fill her up:
  586 
  587     // initialize data structures
  588 
  589     // check key size and make key size (given in bytes) dependant settings
  590     switch(key_size) {
  591         case AES128_KEY_BYTES:    // 128 bit key size
  592             break;
  593         case AES192_KEY_BYTES:    // 192 bit key size
  594             break;
  595         case AES256_KEY_BYTES:    // 256 bit key size
  596             break;
  597         default:
  598             traceEvent(TRACE_ERROR, "aes_init invalid key size %u\n", key_size);
  599             return -1;
  600     }
  601 
  602     // key materiel handling
  603     aes_internal_key_setup ( *ctx, key, 8 * key_size);
  604 
  605     return 0;
  606 }
  607 
  608 
  609 #else // plain C --------------------------------------------------------------------------
  610 
  611 
  612 // rijndael-alg-fst.c version 3.0 (December 2000)
  613 // optimised ANSI C code for the Rijndael cipher (now AES)
  614 // original authors: Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
  615 //                   Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
  616 //                   Paulo Barreto <paulo.barreto@terra.com.br>
  617 //
  618 // was put in the public domain, taken (and modified) from
  619 // https://fastcrypto.org/front/misc/rijndael-alg-fst.c
  620 
  621 
  622 // Te0[x] = S [x].[02, 01, 01, 03];
  623 static const uint32_t Te0[256] = {
  624     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
  625     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
  626     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U, 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
  627     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU, 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
  628     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU, 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
  629     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U, 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
  630     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU, 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
  631     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU, 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
  632     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU, 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
  633     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU, 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
  634     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU, 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
  635     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU, 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
  636     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U, 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
  637     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U, 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
  638     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU, 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
  639     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U, 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
  640     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU, 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
  641     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U, 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
  642     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU, 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
  643     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU, 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
  644     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU, 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
  645     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U, 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
  646     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U, 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
  647     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U, 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
  648     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U, 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
  649     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U, 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
  650     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU, 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
  651     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U, 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
  652     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U, 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
  653     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U, 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
  654     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U, 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
  655     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U, 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU };
  656 
  657 // Te1[x] = S [x].[03, 02, 01, 01];
  658 static const uint32_t Te1[256] = {
  659     0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU, 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
  660     0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU, 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
  661     0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU, 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
  662     0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU, 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
  663     0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U, 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
  664     0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U, 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
  665     0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U, 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
  666     0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U, 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
  667     0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU, 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
  668     0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U, 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
  669     0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU, 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
  670     0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U, 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
  671     0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU, 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
  672     0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU, 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
  673     0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU, 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
  674     0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U, 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
  675     0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU, 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
  676     0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU, 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
  677     0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU, 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
  678     0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U, 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
  679     0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU, 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
  680     0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U, 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
  681     0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU, 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
  682     0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU, 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
  683     0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU, 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
  684     0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU, 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
  685     0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U, 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
  686     0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U, 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
  687     0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U, 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
  688     0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U, 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
  689     0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU, 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
  690     0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU, 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U };
  691 
  692 // Te2[x] = S [x].[01, 03, 02, 01];
  693 static const uint32_t Te2[256] = {
  694     0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU, 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
  695     0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU, 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
  696     0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU, 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
  697     0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU, 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
  698     0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U, 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
  699     0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U, 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
  700     0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U, 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
  701     0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U, 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
  702     0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU, 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
  703     0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U, 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
  704     0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU, 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
  705     0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U, 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
  706     0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU, 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
  707     0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU, 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
  708     0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU, 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
  709     0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U, 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
  710     0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU, 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
  711     0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU, 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
  712     0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU, 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
  713     0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U, 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
  714     0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU, 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
  715     0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U, 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
  716     0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU, 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
  717     0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU, 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
  718     0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU, 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
  719     0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU, 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
  720     0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U, 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
  721     0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U, 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
  722     0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U, 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
  723     0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U, 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
  724     0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU, 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
  725     0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU, 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U };
  726 
  727 // Te3[x] = S [x].[01, 01, 03, 02];
  728 static const uint32_t Te3[256] = {
  729     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U, 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
  730     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U, 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
  731     0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU, 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
  732     0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U, 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
  733     0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU, 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
  734     0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U, 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
  735     0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU, 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
  736     0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU, 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
  737     0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U, 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
  738     0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU, 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
  739     0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U, 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
  740     0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U, 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
  741     0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU, 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
  742     0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU, 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
  743     0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U, 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
  744     0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U, 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
  745     0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U, 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
  746     0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU, 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
  747     0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U, 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
  748     0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U, 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
  749     0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U, 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
  750     0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U, 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
  751     0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU, 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
  752     0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU, 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
  753     0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU, 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
  754     0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU, 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
  755     0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU, 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
  756     0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U, 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
  757     0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U, 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
  758     0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U, 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
  759     0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU, 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
  760     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU, 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU };
  761 
  762 // Te4[x] = S [x].[01, 01, 01, 01];
  763 static const uint32_t Te4[256] = {
  764     0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU, 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
  765     0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU, 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
  766     0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU, 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
  767     0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU, 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
  768     0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U, 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
  769     0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U, 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
  770     0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U, 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
  771     0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U, 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
  772     0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU, 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
  773     0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U, 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
  774     0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU, 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
  775     0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U, 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
  776     0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU, 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
  777     0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU, 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
  778     0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU, 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
  779     0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U, 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
  780     0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU, 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
  781     0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU, 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
  782     0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU, 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
  783     0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U, 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
  784     0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU, 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
  785     0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U, 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
  786     0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU, 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
  787     0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU, 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
  788     0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU, 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
  789     0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU, 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
  790     0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U, 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
  791     0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U, 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
  792     0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U, 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
  793     0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U, 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
  794     0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU, 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
  795     0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU, 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U };
  796 
  797 // Td0[x] = Si[x].[0e, 09, 0d, 0b];
  798 static const uint32_t Td0[256] = {
  799     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
  800     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
  801     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U, 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
  802     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU, 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
  803     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU, 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
  804     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U, 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
  805     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U, 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
  806     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U, 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
  807     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U, 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
  808     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U, 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
  809     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U, 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
  810     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U, 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
  811     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U, 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
  812     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U, 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
  813     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU, 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
  814     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU, 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
  815     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU, 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
  816     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U, 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
  817     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU, 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
  818     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U, 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
  819     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U, 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
  820     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U, 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
  821     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U, 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
  822     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U, 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
  823     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U, 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
  824     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U, 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
  825     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU, 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
  826     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU, 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
  827     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U, 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
  828     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU, 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
  829     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU, 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
  830     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U, 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U };
  831 
  832 // Td1[x] = Si[x].[0b, 0e, 09, 0d];
  833 static const uint32_t Td1[256] = {
  834     0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU, 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
  835     0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU, 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
  836     0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U, 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
  837     0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U, 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
  838     0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U, 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
  839     0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU, 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
  840     0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U, 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
  841     0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U, 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
  842     0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U, 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
  843     0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU, 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
  844     0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU, 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
  845     0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU, 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
  846     0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U, 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
  847     0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U, 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
  848     0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U, 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
  849     0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U, 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
  850     0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U, 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
  851     0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U, 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
  852     0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U, 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
  853     0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U, 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
  854     0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU, 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
  855     0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U, 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
  856     0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U, 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
  857     0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU, 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
  858     0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU, 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
  859     0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U, 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
  860     0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U, 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
  861     0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U, 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
  862     0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU, 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
  863     0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U, 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
  864     0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U, 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
  865     0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U, 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U };
  866 
  867 // Td2[x] = Si[x].[0d, 0b, 0e, 09];
  868 static const uint32_t Td2[256] = {
  869     0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U, 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
  870     0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U, 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
  871     0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU, 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
  872     0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U, 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
  873     0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U, 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
  874     0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U, 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
  875     0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU, 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
  876     0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U, 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
  877     0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U, 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
  878     0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U, 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
  879     0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U, 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
  880     0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U, 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
  881     0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U, 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
  882     0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU, 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
  883     0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU, 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
  884     0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU, 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
  885     0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU, 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
  886     0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U, 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
  887     0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U, 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
  888     0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U, 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
  889     0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U, 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
  890     0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU, 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
  891     0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U, 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
  892     0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU, 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
  893     0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U, 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
  894     0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U, 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
  895     0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U, 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
  896     0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU, 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
  897     0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U, 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
  898     0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U, 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
  899     0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U, 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
  900     0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U, 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U };
  901 
  902 // Td3[x] = Si[x].[09, 0d, 0b, 0e];
  903 static const uint32_t Td3[256] = {
  904     0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU, 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
  905     0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U, 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
  906     0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU, 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
  907     0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U, 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
  908     0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U, 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
  909     0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U, 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
  910     0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U, 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
  911     0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U, 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
  912     0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU, 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
  913     0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U, 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
  914     0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU, 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
  915     0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U, 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
  916     0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U, 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
  917     0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U, 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
  918     0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU, 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
  919     0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U, 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
  920     0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U, 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
  921     0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U, 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
  922     0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U, 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
  923     0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U, 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
  924     0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U, 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
  925     0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U, 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
  926     0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU, 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
  927     0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U, 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
  928     0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U, 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
  929     0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U, 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
  930     0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U, 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
  931     0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU, 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
  932     0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU, 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
  933     0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U, 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
  934     0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U, 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
  935     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U };
  936 
  937 // Td4[x] = Si[x].[01, 01, 01, 01];
  938 static const uint32_t Td4[256] = {
  939     0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U, 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
  940     0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU, 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
  941     0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U, 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
  942     0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U, 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
  943     0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U, 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
  944     0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU, 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
  945     0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U, 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
  946     0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U, 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
  947     0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U, 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
  948     0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU, 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
  949     0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U, 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
  950     0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U, 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
  951     0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U, 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
  952     0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U, 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
  953     0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU, 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
  954     0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U, 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
  955     0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U, 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
  956     0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU, 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
  957     0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U, 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
  958     0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U, 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
  959     0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U, 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
  960     0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU, 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
  961     0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU, 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
  962     0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU, 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
  963     0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U, 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
  964     0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U, 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
  965     0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U, 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
  966     0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU, 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
  967     0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU, 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
  968     0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU, 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
  969     0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU, 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
  970     0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U, 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU };
  971 
  972 // for 128-bit blocks, Rijndael never uses more than 10 rcon values
  973 static const uint32_t rcon[] = {
  974     0x01000000, 0x02000000, 0x04000000, 0x08000000,
  975     0x10000000, 0x20000000, 0x40000000, 0x80000000,
  976     0x1B000000, 0x36000000 };
  977 
  978 
  979 #define GETU32(p) (be32toh((*((uint32_t*)(p)))))
  980 #define PUTU32(ct, st) { *((uint32_t*)(ct)) = htobe32((st)); }
  981 
  982 #define b0(x) ((uint8_t)(x))
  983 #define b1(x) ((uint8_t)((x) >> 8))
  984 #define b2(x) ((uint8_t)((x) >> 16))
  985 #define b3(x) ((uint8_t)((x) >> 24))
  986 
  987 #define m0(x) ((x) & 0x000000ff)
  988 #define m1(x) ((x) & 0x0000ff00)
  989 #define m2(x) ((x) & 0x00ff0000)
  990 #define m3(x) ((x) & 0xff000000)
  991 
  992 
  993 // expand the cipher key into the encryption key schedule and
  994 // return the number of rounds for the given cipher key size
  995 static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) {
  996 
  997     int i = 0;
  998     uint32_t temp;
  999 
 1000     rk[0] = GETU32(cipherKey     );
 1001     rk[1] = GETU32(cipherKey +  4);
 1002     rk[2] = GETU32(cipherKey +  8);
 1003     rk[3] = GETU32(cipherKey + 12);
 1004     if(keyBits == 128) {
 1005         for(;;) {
 1006             temp  = rk[3];
 1007             rk[4] = rk[0] ^
 1008                     (Te4[b2(temp)] & 0xff000000) ^
 1009                     (Te4[b1(temp)] & 0x00ff0000) ^
 1010                     (Te4[b0(temp)] & 0x0000ff00) ^
 1011                     (Te4[b3(temp)] & 0x000000ff) ^
 1012                     rcon[i];
 1013             rk[5] = rk[1] ^ rk[4];
 1014             rk[6] = rk[2] ^ rk[5];
 1015             rk[7] = rk[3] ^ rk[6];
 1016             if(++i == 10) {
 1017                 return 10;
 1018             }
 1019             rk += 4;
 1020         }
 1021     }
 1022     rk[4] = GETU32(cipherKey + 16);
 1023     rk[5] = GETU32(cipherKey + 20);
 1024     if(keyBits == 192) {
 1025         for(;;) {
 1026             temp = rk[ 5];
 1027             rk[ 6] = rk[ 0] ^
 1028                     (Te4[b2(temp)] & 0xff000000) ^
 1029                     (Te4[b1(temp)] & 0x00ff0000) ^
 1030                     (Te4[b0(temp)] & 0x0000ff00) ^
 1031                     (Te4[b3(temp)] & 0x000000ff) ^
 1032                     rcon[i];
 1033             rk[ 7] = rk[ 1] ^ rk[ 6];
 1034             rk[ 8] = rk[ 2] ^ rk[ 7];
 1035             rk[ 9] = rk[ 3] ^ rk[ 8];
 1036             if(++i == 8) {
 1037                 return 12;
 1038             }
 1039             rk[10] = rk[ 4] ^ rk[ 9];
 1040             rk[11] = rk[ 5] ^ rk[10];
 1041             rk += 6;
 1042         }
 1043     }
 1044     rk[6] = GETU32(cipherKey + 24);
 1045     rk[7] = GETU32(cipherKey + 28);
 1046     if(keyBits == 256) {
 1047         for(;;) {
 1048             temp = rk[ 7];
 1049             rk[ 8] = rk[ 0] ^
 1050                     (Te4[b2(temp)] & 0xff000000) ^
 1051                     (Te4[b1(temp)] & 0x00ff0000) ^
 1052                     (Te4[b0(temp)] & 0x0000ff00) ^
 1053                     (Te4[b3(temp)] & 0x000000ff) ^
 1054                     rcon[i];
 1055             rk[ 9] = rk[ 1] ^ rk[ 8];
 1056             rk[10] = rk[ 2] ^ rk[ 9];
 1057             rk[11] = rk[ 3] ^ rk[10];
 1058             if(++i == 7) {
 1059                 return 14;
 1060             }
 1061             temp = rk[11];
 1062             rk[12] = rk[ 4] ^
 1063                     (Te4[b3(temp)] & 0xff000000) ^
 1064                     (Te4[b2(temp)] & 0x00ff0000) ^
 1065                     (Te4[b1(temp)] & 0x0000ff00) ^
 1066                     (Te4[b0(temp)] & 0x000000ff);
 1067             rk[13] = rk[ 5] ^ rk[12];
 1068             rk[14] = rk[ 6] ^ rk[13];
 1069             rk[15] = rk[ 7] ^ rk[14];
 1070             rk += 8;
 1071         }
 1072     }
 1073 
 1074     return 0;
 1075 }
 1076 
 1077 
 1078 #define INVMIXCOLRK(n) rk[n] =  Td0[b0(Te4[b3(rk[n])])] ^ Td1[b0(Te4[b2(rk[n])])] ^ Td2[b0(Te4[b1(rk[n])])] ^ Td3[b0(Te4[b0(rk[n])])]
 1079 
 1080 
 1081 // expand the cipher key into the decryption key schedule and
 1082 // return the number of rounds for the given cipher key size
 1083 static int aes_internal_key_setup_dec (uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) {
 1084 
 1085     int Nr, i, j;
 1086     uint32_t temp;
 1087 
 1088     // expand the cipher key
 1089     Nr = aes_internal_key_setup_enc(rk, cipherKey, keyBits);
 1090     // invert the order of the round keys
 1091     for(i = 0, j = 4*Nr; i < j; i += 4, j -= 4) {
 1092         temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
 1093         temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
 1094         temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
 1095         temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
 1096     }
 1097 
 1098     // apply the inverse MixColumn transform to all round keys but the first and the last
 1099     for(i = 1; i < Nr; i++) {
 1100         rk += 4;
 1101         INVMIXCOLRK(0);
 1102         INVMIXCOLRK(1);
 1103         INVMIXCOLRK(2);
 1104         INVMIXCOLRK(3);
 1105     }
 1106 
 1107     return Nr;
 1108 }
 1109 
 1110 
 1111 #define AES_ENC_ROUND(DST, SRC, round) \
 1112     DST##0 = Te0[b3(SRC##0)] ^ Te1[b2(SRC##1)] ^ Te2[b1(SRC##2)] ^ Te3[b0(SRC##3)] ^ rk[4 * round + 0]; \
 1113     DST##1 = Te0[b3(SRC##1)] ^ Te1[b2(SRC##2)] ^ Te2[b1(SRC##3)] ^ Te3[b0(SRC##0)] ^ rk[4 * round + 1]; \
 1114     DST##2 = Te0[b3(SRC##2)] ^ Te1[b2(SRC##3)] ^ Te2[b1(SRC##0)] ^ Te3[b0(SRC##1)] ^ rk[4 * round + 2]; \
 1115     DST##3 = Te0[b3(SRC##3)] ^ Te1[b2(SRC##0)] ^ Te2[b1(SRC##1)] ^ Te3[b0(SRC##2)] ^ rk[4 * round + 3];
 1116 
 1117 
 1118 static void aes_internal_encrypt (const uint32_t rk[/*4*(Nr + 1)*/], int Nr, const uint8_t pt[16], uint8_t ct[16]) {
 1119 
 1120     uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
 1121 
 1122     // map byte array block to cipher state and add initial round key
 1123     s0 = GETU32(pt     ) ^ rk[0];
 1124     s1 = GETU32(pt +  4) ^ rk[1];
 1125     s2 = GETU32(pt +  8) ^ rk[2];
 1126     s3 = GETU32(pt + 12) ^ rk[3];
 1127 
 1128     AES_ENC_ROUND(t, s, 1);
 1129     AES_ENC_ROUND(s, t, 2);
 1130     AES_ENC_ROUND(t, s, 3);
 1131     AES_ENC_ROUND(s, t, 4);
 1132     AES_ENC_ROUND(t, s, 5);
 1133     AES_ENC_ROUND(s, t, 6);
 1134     AES_ENC_ROUND(t, s, 7);
 1135     AES_ENC_ROUND(s, t, 8);
 1136     AES_ENC_ROUND(t, s, 9);
 1137 
 1138     if(Nr > 10) {
 1139         AES_ENC_ROUND(s, t, 10);
 1140         AES_ENC_ROUND(t, s, 11);
 1141         if(Nr > 12) {
 1142             AES_ENC_ROUND(s, t, 12);
 1143             AES_ENC_ROUND(t, s, 13);
 1144         }
 1145     }
 1146 
 1147     rk += Nr << 2;
 1148     // apply last round and map cipher state to byte array block
 1149     s0 = m3(Te4[b3(t0)]) ^ m2(Te4[b2(t1)]) ^ m1(Te4[b1(t2)]) ^ m0(Te4[b0(t3)]) ^ rk[0];
 1150     PUTU32(ct     , s0);
 1151     s1 = m3(Te4[b3(t1)]) ^ m2(Te4[b2(t2)]) ^ m1(Te4[b1(t3)]) ^ m0(Te4[b0(t0)]) ^ rk[1];
 1152     PUTU32(ct +  4, s1);
 1153     s2 = m3(Te4[b3(t2)]) ^ m2(Te4[b2(t3)]) ^ m1(Te4[b1(t0)]) ^ m0(Te4[b0(t1)]) ^ rk[2];
 1154     PUTU32(ct +  8, s2);
 1155     s3 = m3(Te4[b3(t3)]) ^ m2(Te4[b2(t0)]) ^ m1(Te4[b1(t1)]) ^ m0(Te4[b0(t2)]) ^ rk[3];
 1156     PUTU32(ct + 12, s3);
 1157 }
 1158 
 1159 
 1160 #define AES_DEC_ROUND(DST, SRC, round) \
 1161     DST##0 = Td0[b3(SRC##0)] ^ Td1[b2(SRC##3)] ^ Td2[b1(SRC##2)] ^ Td3[b0(SRC##1)] ^ rk[4 * round + 0]; \
 1162     DST##1 = Td0[b3(SRC##1)] ^ Td1[b2(SRC##0)] ^ Td2[b1(SRC##3)] ^ Td3[b0(SRC##2)] ^ rk[4 * round + 1]; \
 1163     DST##2 = Td0[b3(SRC##2)] ^ Td1[b2(SRC##1)] ^ Td2[b1(SRC##0)] ^ Td3[b0(SRC##3)] ^ rk[4 * round + 2]; \
 1164     DST##3 = Td0[b3(SRC##3)] ^ Td1[b2(SRC##2)] ^ Td2[b1(SRC##1)] ^ Td3[b0(SRC##0)] ^ rk[4 * round + 3];
 1165 
 1166 
 1167 static void aes_internal_decrypt (const uint32_t rk[/*4*(Nr + 1)*/], int Nr, const uint8_t ct[16], uint8_t pt[16]) {
 1168 
 1169     uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
 1170 
 1171     // map byte array block to cipher state and add initial round key
 1172     s0 = GETU32(ct     ) ^ rk[0];
 1173     s1 = GETU32(ct +  4) ^ rk[1];
 1174     s2 = GETU32(ct +  8) ^ rk[2];
 1175     s3 = GETU32(ct + 12) ^ rk[3];
 1176 
 1177     AES_DEC_ROUND(t, s, 1);
 1178     AES_DEC_ROUND(s, t, 2);
 1179     AES_DEC_ROUND(t, s, 3);
 1180     AES_DEC_ROUND(s, t, 4);
 1181     AES_DEC_ROUND(t, s, 5);
 1182     AES_DEC_ROUND(s, t, 6);
 1183     AES_DEC_ROUND(t, s, 7);
 1184     AES_DEC_ROUND(s, t, 8);
 1185     AES_DEC_ROUND(t, s, 9);
 1186 
 1187     if(Nr > 10) {
 1188         AES_DEC_ROUND(s, t, 10);
 1189         AES_DEC_ROUND(t, s, 11);
 1190         if(Nr > 12) {
 1191             AES_DEC_ROUND(s, t, 12);
 1192             AES_DEC_ROUND(t, s, 13);
 1193         }
 1194     }
 1195 
 1196     rk += Nr << 2;
 1197     // apply last round and map cipher state to byte array block
 1198     s0 = m3(Td4[b3(t0)]) ^ m2(Td4[b2(t3)]) ^ m1(Td4[b1(t2)]) ^ m0(Td4[b0(t1)]) ^ rk[0];
 1199     PUTU32(pt     , s0);
 1200     s1 = m3(Td4[b3(t1)]) ^ m2(Td4[b2(t0)]) ^ m1(Td4[b1(t3)]) ^ m0(Td4[b0(t2)]) ^ rk[1];
 1201     PUTU32(pt +  4, s1);
 1202     s2 = m3(Td4[b3(t2)]) ^ m2(Td4[b2(t1)]) ^ m1(Td4[b1(t0)]) ^ m0(Td4[b0(t3)]) ^ rk[2];
 1203     PUTU32(pt +  8, s2);
 1204     s3 = m3(Td4[b3(t3)]) ^ m2(Td4[b2(t2)]) ^ m1(Td4[b1(t1)]) ^ m0(Td4[b0(t0)]) ^ rk[3];
 1205     PUTU32(pt + 12, s3);
 1206 }
 1207 
 1208 
 1209 // public API
 1210 
 1211 
 1212 int aes_ecb_decrypt (unsigned char *out, const unsigned char *in, aes_context_t *ctx) {
 1213 
 1214     aes_internal_decrypt(ctx->dec_rk, ctx->Nr, in, out);
 1215 
 1216     return AES_BLOCK_SIZE;
 1217 }
 1218 
 1219 
 1220 // not used
 1221 int aes_ecb_encrypt (unsigned char *out, const unsigned char *in, aes_context_t *ctx) {
 1222 
 1223     aes_internal_encrypt(ctx->enc_rk, ctx->Nr, in, out);
 1224 
 1225     return AES_BLOCK_SIZE;
 1226 }
 1227 
 1228 
 1229 #define fix_xor(target, source) *(uint32_t*)&(target)[0] = *(uint32_t*)&(target)[0] ^ *(uint32_t*)&(source)[0]; *(uint32_t*)&(target)[4] = *(uint32_t*)&(target)[4] ^ *(uint32_t*)&(source)[4]; \
 1230                                 *(uint32_t*)&(target)[8] = *(uint32_t*)&(target)[8] ^ *(uint32_t*)&(source)[8]; *(uint32_t*)&(target)[12] = *(uint32_t*)&(target)[12] ^ *(uint32_t*)&(source)[12];
 1231 
 1232 
 1233 int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len,
 1234                      const unsigned char *iv, aes_context_t *ctx) {
 1235 
 1236     uint8_t tmp[AES_BLOCK_SIZE];
 1237     size_t i;
 1238     size_t n;
 1239 
 1240     memcpy(tmp, iv, AES_BLOCK_SIZE);
 1241 
 1242     n = in_len / AES_BLOCK_SIZE;
 1243     for(i=0; i < n; i++) {
 1244         fix_xor(tmp, &in[i * AES_BLOCK_SIZE]);
 1245         aes_internal_encrypt(ctx->enc_rk, ctx->Nr, tmp, tmp);
 1246         memcpy(&out[i * AES_BLOCK_SIZE], tmp, AES_BLOCK_SIZE);
 1247     }
 1248 
 1249     return n * AES_BLOCK_SIZE;
 1250 }
 1251 
 1252 
 1253 int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len,
 1254                      const unsigned char *iv, aes_context_t *ctx) {
 1255 
 1256     uint8_t tmp[AES_BLOCK_SIZE];
 1257     uint8_t old[AES_BLOCK_SIZE];
 1258     size_t i;
 1259     size_t n;
 1260 
 1261     memcpy(tmp, iv, AES_BLOCK_SIZE);
 1262 
 1263     n = in_len / AES_BLOCK_SIZE;
 1264     for(i=0; i < n; i++) {
 1265         memcpy(old, &in[i * AES_BLOCK_SIZE], AES_BLOCK_SIZE);
 1266         aes_internal_decrypt(ctx->dec_rk, ctx->Nr, &in[i * AES_BLOCK_SIZE], &out[i * AES_BLOCK_SIZE]);
 1267         fix_xor(&out[i * AES_BLOCK_SIZE], tmp);
 1268         memcpy(tmp, old, AES_BLOCK_SIZE);
 1269     }
 1270 
 1271     return n * AES_BLOCK_SIZE;
 1272 }
 1273 
 1274 
 1275 int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
 1276 
 1277     // allocate context...
 1278     *ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t));
 1279     if(!(*ctx))
 1280         return -1;
 1281     // ...and fill her up:
 1282 
 1283     // initialize data structures
 1284 
 1285     // check key size and make key size (given in bytes) dependant settings
 1286     switch(key_size) {
 1287         case AES128_KEY_BYTES:    // 128 bit key size
 1288             break;
 1289         case AES192_KEY_BYTES:    // 192 bit key size
 1290             break;
 1291         case AES256_KEY_BYTES:    // 256 bit key size
 1292             break;
 1293         default:
 1294             traceEvent(TRACE_ERROR, "aes_init invalid key size %u\n", key_size);
 1295             return -1;
 1296     }
 1297 
 1298     // key materiel handling
 1299     (*ctx)->Nr = aes_internal_key_setup_enc((*ctx)->enc_rk/*[4*(Nr + 1)]*/, key, 8 * key_size);
 1300                  aes_internal_key_setup_dec((*ctx)->dec_rk/*[4*(Nr + 1)]*/, key, 8 * key_size);
 1301     return 0;
 1302 }
 1303 
 1304 
 1305 #endif // openSSL 1.1, AES-NI, plain C ----------------------------------------------------------------------------
 1306 
 1307 
 1308 int aes_deinit (aes_context_t *ctx) {
 1309 
 1310     if(ctx) free(ctx);
 1311 
 1312     return 0;
 1313 }