"Fossies" - the Fresh Open Source Software Archive

Member "nss-3.55/nss/lib/freebl/aes-armv8.c" (24 Jul 2020, 35181 Bytes) of package /linux/misc/nss-3.55.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "aes-armv8.c" see the Fossies "Dox" file reference documentation.

    1 /* This Source Code Form is subject to the terms of the Mozilla Public
    2  * License, v. 2.0. If a copy of the MPL was not distributed with this
    3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    4 
    5 #include "secerr.h"
    6 #include "rijndael.h"
    7 
    8 #if ((defined(__clang__) ||                                         \
    9       (defined(__GNUC__) && defined(__GNUC_MINOR__) &&              \
   10        (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 8)))) && \
   11      defined(IS_LITTLE_ENDIAN))
   12 
   13 #ifndef __ARM_FEATURE_CRYPTO
   14 #error "Compiler option is invalid"
   15 #endif
   16 
   17 #include <arm_neon.h>
   18 
   19 SECStatus
   20 arm_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output,
   21                         unsigned int *outputLen,
   22                         unsigned int maxOutputLen,
   23                         const unsigned char *input,
   24                         unsigned int inputLen,
   25                         unsigned int blocksize)
   26 {
   27 #if !defined(HAVE_UNALIGNED_ACCESS)
   28     pre_align unsigned char buf[16] post_align;
   29 #endif
   30     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
   31     uint8x16_t key11;
   32     const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
   33 
   34     if (!inputLen) {
   35         return SECSuccess;
   36     }
   37 
   38     key1 = vld1q_u8(key);
   39     key2 = vld1q_u8(key + 16);
   40     key3 = vld1q_u8(key + 32);
   41     key4 = vld1q_u8(key + 48);
   42     key5 = vld1q_u8(key + 64);
   43     key6 = vld1q_u8(key + 80);
   44     key7 = vld1q_u8(key + 96);
   45     key8 = vld1q_u8(key + 112);
   46     key9 = vld1q_u8(key + 128);
   47     key10 = vld1q_u8(key + 144);
   48     key11 = vld1q_u8(key + 160);
   49 
   50     while (inputLen > 0) {
   51         uint8x16_t state;
   52 #if defined(HAVE_UNALIGNED_ACCESS)
   53         state = vld1q_u8(input);
   54 #else
   55         if ((uintptr_t)input & 0x7) {
   56             memcpy(buf, input, 16);
   57             state = vld1q_u8(__builtin_assume_aligned(buf, 16));
   58         } else {
   59             state = vld1q_u8(__builtin_assume_aligned(input, 8));
   60         }
   61 #endif
   62         input += 16;
   63         inputLen -= 16;
   64 
   65         /* Rounds */
   66         state = vaeseq_u8(state, key1);
   67         state = vaesmcq_u8(state);
   68         state = vaeseq_u8(state, key2);
   69         state = vaesmcq_u8(state);
   70         state = vaeseq_u8(state, key3);
   71         state = vaesmcq_u8(state);
   72         state = vaeseq_u8(state, key4);
   73         state = vaesmcq_u8(state);
   74         state = vaeseq_u8(state, key5);
   75         state = vaesmcq_u8(state);
   76         state = vaeseq_u8(state, key6);
   77         state = vaesmcq_u8(state);
   78         state = vaeseq_u8(state, key7);
   79         state = vaesmcq_u8(state);
   80         state = vaeseq_u8(state, key8);
   81         state = vaesmcq_u8(state);
   82         state = vaeseq_u8(state, key9);
   83         state = vaesmcq_u8(state);
   84         state = vaeseq_u8(state, key10);
   85         /* AddRoundKey */
   86         state = veorq_u8(state, key11);
   87 
   88 #if defined(HAVE_UNALIGNED_ACCESS)
   89         vst1q_u8(output, state);
   90 #else
   91         if ((uintptr_t)output & 0x7) {
   92             vst1q_u8(__builtin_assume_aligned(buf, 16), state);
   93             memcpy(output, buf, 16);
   94         } else {
   95             vst1q_u8(__builtin_assume_aligned(output, 8), state);
   96         }
   97 #endif
   98         output += 16;
   99     }
  100 
  101     return SECSuccess;
  102 }
  103 
  104 SECStatus
  105 arm_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output,
  106                         unsigned int *outputLen,
  107                         unsigned int maxOutputLen,
  108                         const unsigned char *input,
  109                         unsigned int inputLen,
  110                         unsigned int blocksize)
  111 {
  112 #if !defined(HAVE_UNALIGNED_ACCESS)
  113     pre_align unsigned char buf[16] post_align;
  114 #endif
  115     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
  116     uint8x16_t key11;
  117     const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
  118 
  119     if (inputLen == 0) {
  120         return SECSuccess;
  121     }
  122 
  123     key1 = vld1q_u8(key);
  124     key2 = vld1q_u8(key + 16);
  125     key3 = vld1q_u8(key + 32);
  126     key4 = vld1q_u8(key + 48);
  127     key5 = vld1q_u8(key + 64);
  128     key6 = vld1q_u8(key + 80);
  129     key7 = vld1q_u8(key + 96);
  130     key8 = vld1q_u8(key + 112);
  131     key9 = vld1q_u8(key + 128);
  132     key10 = vld1q_u8(key + 144);
  133     key11 = vld1q_u8(key + 160);
  134 
  135     while (inputLen > 0) {
  136         uint8x16_t state;
  137 #if defined(HAVE_UNALIGNED_ACCESS)
  138         state = vld1q_u8(input);
  139 #else
  140         if ((uintptr_t)input & 0x7) {
  141             memcpy(buf, input, 16);
  142             state = vld1q_u8(__builtin_assume_aligned(buf, 16));
  143         } else {
  144             state = vld1q_u8(__builtin_assume_aligned(input, 8));
  145         }
  146 #endif
  147         input += 16;
  148         inputLen -= 16;
  149 
  150         /* Rounds */
  151         state = vaesdq_u8(state, key11);
  152         state = vaesimcq_u8(state);
  153         state = vaesdq_u8(state, key10);
  154         state = vaesimcq_u8(state);
  155         state = vaesdq_u8(state, key9);
  156         state = vaesimcq_u8(state);
  157         state = vaesdq_u8(state, key8);
  158         state = vaesimcq_u8(state);
  159         state = vaesdq_u8(state, key7);
  160         state = vaesimcq_u8(state);
  161         state = vaesdq_u8(state, key6);
  162         state = vaesimcq_u8(state);
  163         state = vaesdq_u8(state, key5);
  164         state = vaesimcq_u8(state);
  165         state = vaesdq_u8(state, key4);
  166         state = vaesimcq_u8(state);
  167         state = vaesdq_u8(state, key3);
  168         state = vaesimcq_u8(state);
  169         state = vaesdq_u8(state, key2);
  170         /* AddRoundKey */
  171         state = veorq_u8(state, key1);
  172 
  173 #if defined(HAVE_UNALIGNED_ACCESS)
  174         vst1q_u8(output, state);
  175 #else
  176         if ((uintptr_t)output & 0x7) {
  177             vst1q_u8(__builtin_assume_aligned(buf, 16), state);
  178             memcpy(output, buf, 16);
  179         } else {
  180             vst1q_u8(__builtin_assume_aligned(output, 8), state);
  181         }
  182 #endif
  183         output += 16;
  184     }
  185 
  186     return SECSuccess;
  187 }
  188 
  189 SECStatus
  190 arm_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output,
  191                         unsigned int *outputLen,
  192                         unsigned int maxOutputLen,
  193                         const unsigned char *input,
  194                         unsigned int inputLen,
  195                         unsigned int blocksize)
  196 {
  197 #if !defined(HAVE_UNALIGNED_ACCESS)
  198     pre_align unsigned char buf[16] post_align;
  199 #endif
  200     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
  201     uint8x16_t key11;
  202     uint8x16_t iv;
  203     const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
  204 
  205     if (!inputLen) {
  206         return SECSuccess;
  207     }
  208 
  209     /* iv */
  210     iv = vld1q_u8(cx->iv);
  211 
  212     /* expanedKey */
  213     key1 = vld1q_u8(key);
  214     key2 = vld1q_u8(key + 16);
  215     key3 = vld1q_u8(key + 32);
  216     key4 = vld1q_u8(key + 48);
  217     key5 = vld1q_u8(key + 64);
  218     key6 = vld1q_u8(key + 80);
  219     key7 = vld1q_u8(key + 96);
  220     key8 = vld1q_u8(key + 112);
  221     key9 = vld1q_u8(key + 128);
  222     key10 = vld1q_u8(key + 144);
  223     key11 = vld1q_u8(key + 160);
  224 
  225     while (inputLen > 0) {
  226         uint8x16_t state;
  227 #if defined(HAVE_UNALIGNED_ACCESS)
  228         state = vld1q_u8(input);
  229 #else
  230         if ((uintptr_t)input & 0x7) {
  231             memcpy(buf, input, 16);
  232             state = vld1q_u8(__builtin_assume_aligned(buf, 16));
  233         } else {
  234             state = vld1q_u8(__builtin_assume_aligned(input, 8));
  235         }
  236 #endif
  237         input += 16;
  238         inputLen -= 16;
  239 
  240         state = veorq_u8(state, iv);
  241 
  242         /* Rounds */
  243         state = vaeseq_u8(state, key1);
  244         state = vaesmcq_u8(state);
  245         state = vaeseq_u8(state, key2);
  246         state = vaesmcq_u8(state);
  247         state = vaeseq_u8(state, key3);
  248         state = vaesmcq_u8(state);
  249         state = vaeseq_u8(state, key4);
  250         state = vaesmcq_u8(state);
  251         state = vaeseq_u8(state, key5);
  252         state = vaesmcq_u8(state);
  253         state = vaeseq_u8(state, key6);
  254         state = vaesmcq_u8(state);
  255         state = vaeseq_u8(state, key7);
  256         state = vaesmcq_u8(state);
  257         state = vaeseq_u8(state, key8);
  258         state = vaesmcq_u8(state);
  259         state = vaeseq_u8(state, key9);
  260         state = vaesmcq_u8(state);
  261         state = vaeseq_u8(state, key10);
  262         /* AddRoundKey */
  263         state = veorq_u8(state, key11);
  264 
  265 #if defined(HAVE_UNALIGNED_ACCESS)
  266         vst1q_u8(output, state);
  267 #else
  268         if ((uintptr_t)output & 0x7) {
  269             vst1q_u8(__builtin_assume_aligned(buf, 16), state);
  270             memcpy(output, buf, 16);
  271         } else {
  272             vst1q_u8(__builtin_assume_aligned(output, 8), state);
  273         }
  274 #endif
  275         output += 16;
  276         iv = state;
  277     }
  278     vst1q_u8(cx->iv, iv);
  279 
  280     return SECSuccess;
  281 }
  282 
  283 SECStatus
  284 arm_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output,
  285                         unsigned int *outputLen,
  286                         unsigned int maxOutputLen,
  287                         const unsigned char *input,
  288                         unsigned int inputLen,
  289                         unsigned int blocksize)
  290 {
  291 #if !defined(HAVE_UNALIGNED_ACCESS)
  292     pre_align unsigned char buf[16] post_align;
  293 #endif
  294     uint8x16_t iv;
  295     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
  296     uint8x16_t key11;
  297     const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
  298 
  299     if (!inputLen) {
  300         return SECSuccess;
  301     }
  302 
  303     /* iv */
  304     iv = vld1q_u8(cx->iv);
  305 
  306     /* expanedKey */
  307     key1 = vld1q_u8(key);
  308     key2 = vld1q_u8(key + 16);
  309     key3 = vld1q_u8(key + 32);
  310     key4 = vld1q_u8(key + 48);
  311     key5 = vld1q_u8(key + 64);
  312     key6 = vld1q_u8(key + 80);
  313     key7 = vld1q_u8(key + 96);
  314     key8 = vld1q_u8(key + 112);
  315     key9 = vld1q_u8(key + 128);
  316     key10 = vld1q_u8(key + 144);
  317     key11 = vld1q_u8(key + 160);
  318 
  319     while (inputLen > 0) {
  320         uint8x16_t state, old_state;
  321 #if defined(HAVE_UNALIGNED_ACCESS)
  322         state = vld1q_u8(input);
  323 #else
  324         if ((uintptr_t)input & 0x7) {
  325             memcpy(buf, input, 16);
  326             state = vld1q_u8(__builtin_assume_aligned(buf, 16));
  327         } else {
  328             state = vld1q_u8(__builtin_assume_aligned(input, 8));
  329         }
  330 #endif
  331         old_state = state;
  332         input += 16;
  333         inputLen -= 16;
  334 
  335         /* Rounds */
  336         state = vaesdq_u8(state, key11);
  337         state = vaesimcq_u8(state);
  338         state = vaesdq_u8(state, key10);
  339         state = vaesimcq_u8(state);
  340         state = vaesdq_u8(state, key9);
  341         state = vaesimcq_u8(state);
  342         state = vaesdq_u8(state, key8);
  343         state = vaesimcq_u8(state);
  344         state = vaesdq_u8(state, key7);
  345         state = vaesimcq_u8(state);
  346         state = vaesdq_u8(state, key6);
  347         state = vaesimcq_u8(state);
  348         state = vaesdq_u8(state, key5);
  349         state = vaesimcq_u8(state);
  350         state = vaesdq_u8(state, key4);
  351         state = vaesimcq_u8(state);
  352         state = vaesdq_u8(state, key3);
  353         state = vaesimcq_u8(state);
  354         state = vaesdq_u8(state, key2);
  355         /* AddRoundKey */
  356         state = veorq_u8(state, key1);
  357 
  358         state = veorq_u8(state, iv);
  359 
  360 #if defined(HAVE_UNALIGNED_ACCESS)
  361         vst1q_u8(output, state);
  362 #else
  363         if ((uintptr_t)output & 0x7) {
  364             vst1q_u8(__builtin_assume_aligned(buf, 16), state);
  365             memcpy(output, buf, 16);
  366         } else {
  367             vst1q_u8(__builtin_assume_aligned(output, 8), state);
  368         }
  369 #endif
  370         output += 16;
  371 
  372         iv = old_state;
  373     }
  374     vst1q_u8(cx->iv, iv);
  375 
  376     return SECSuccess;
  377 }
  378 
  379 SECStatus
  380 arm_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output,
  381                         unsigned int *outputLen,
  382                         unsigned int maxOutputLen,
  383                         const unsigned char *input,
  384                         unsigned int inputLen,
  385                         unsigned int blocksize)
  386 {
  387 #if !defined(HAVE_UNALIGNED_ACCESS)
  388     pre_align unsigned char buf[16] post_align;
  389 #endif
  390     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
  391     uint8x16_t key11, key12, key13;
  392     PRUint8 *key = (PRUint8 *)cx->k.expandedKey;
  393 
  394     if (!inputLen) {
  395         return SECSuccess;
  396     }
  397 
  398     key1 = vld1q_u8(key);
  399     key2 = vld1q_u8(key + 16);
  400     key3 = vld1q_u8(key + 32);
  401     key4 = vld1q_u8(key + 48);
  402     key5 = vld1q_u8(key + 64);
  403     key6 = vld1q_u8(key + 80);
  404     key7 = vld1q_u8(key + 96);
  405     key8 = vld1q_u8(key + 112);
  406     key9 = vld1q_u8(key + 128);
  407     key10 = vld1q_u8(key + 144);
  408     key11 = vld1q_u8(key + 160);
  409     key12 = vld1q_u8(key + 176);
  410     key13 = vld1q_u8(key + 192);
  411 
  412     while (inputLen > 0) {
  413         uint8x16_t state;
  414 #if defined(HAVE_UNALIGNED_ACCESS)
  415         state = vld1q_u8(input);
  416 #else
  417         if ((uintptr_t)input & 0x7) {
  418             memcpy(buf, input, 16);
  419             state = vld1q_u8(__builtin_assume_aligned(buf, 16));
  420         } else {
  421             state = vld1q_u8(__builtin_assume_aligned(input, 8));
  422         }
  423 #endif
  424         input += 16;
  425         inputLen -= 16;
  426 
  427         /* Rounds */
  428         state = vaeseq_u8(state, key1);
  429         state = vaesmcq_u8(state);
  430         state = vaeseq_u8(state, key2);
  431         state = vaesmcq_u8(state);
  432         state = vaeseq_u8(state, key3);
  433         state = vaesmcq_u8(state);
  434         state = vaeseq_u8(state, key4);
  435         state = vaesmcq_u8(state);
  436         state = vaeseq_u8(state, key5);
  437         state = vaesmcq_u8(state);
  438         state = vaeseq_u8(state, key6);
  439         state = vaesmcq_u8(state);
  440         state = vaeseq_u8(state, key7);
  441         state = vaesmcq_u8(state);
  442         state = vaeseq_u8(state, key8);
  443         state = vaesmcq_u8(state);
  444         state = vaeseq_u8(state, key9);
  445         state = vaesmcq_u8(state);
  446         state = vaeseq_u8(state, key10);
  447         state = vaesmcq_u8(state);
  448         state = vaeseq_u8(state, key11);
  449         state = vaesmcq_u8(state);
  450         state = vaeseq_u8(state, key12);
  451         /* AddRoundKey */
  452         state = veorq_u8(state, key13);
  453 
  454 #if defined(HAVE_UNALIGNED_ACCESS)
  455         vst1q_u8(output, state);
  456 #else
  457         if ((uintptr_t)output & 0x7) {
  458             vst1q_u8(__builtin_assume_aligned(buf, 16), state);
  459             memcpy(output, buf, 16);
  460         } else {
  461             vst1q_u8(__builtin_assume_aligned(output, 8), state);
  462         }
  463 #endif
  464         output += 16;
  465     }
  466 
  467     return SECSuccess;
  468 }
  469 
  470 SECStatus
  471 arm_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output,
  472                         unsigned int *outputLen,
  473                         unsigned int maxOutputLen,
  474                         const unsigned char *input,
  475                         unsigned int inputLen,
  476                         unsigned int blocksize)
  477 {
  478 #if !defined(HAVE_UNALIGNED_ACCESS)
  479     pre_align unsigned char buf[16] post_align;
  480 #endif
  481     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
  482     uint8x16_t key11, key12, key13;
  483     const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
  484 
  485     if (!inputLen) {
  486         return SECSuccess;
  487     }
  488 
  489     key1 = vld1q_u8(key);
  490     key2 = vld1q_u8(key + 16);
  491     key3 = vld1q_u8(key + 32);
  492     key4 = vld1q_u8(key + 48);
  493     key5 = vld1q_u8(key + 64);
  494     key6 = vld1q_u8(key + 80);
  495     key7 = vld1q_u8(key + 96);
  496     key8 = vld1q_u8(key + 112);
  497     key9 = vld1q_u8(key + 128);
  498     key10 = vld1q_u8(key + 144);
  499     key11 = vld1q_u8(key + 160);
  500     key12 = vld1q_u8(key + 176);
  501     key13 = vld1q_u8(key + 192);
  502 
  503     while (inputLen > 0) {
  504         uint8x16_t state;
  505 #if defined(HAVE_UNALIGNED_ACCESS)
  506         state = vld1q_u8(input);
  507 #else
  508         if ((uintptr_t)input & 0x7) {
  509             memcpy(buf, input, 16);
  510             state = vld1q_u8(__builtin_assume_aligned(buf, 16));
  511         } else {
  512             state = vld1q_u8(__builtin_assume_aligned(input, 8));
  513         }
  514 #endif
  515         input += 16;
  516         inputLen -= 16;
  517 
  518         /* Rounds */
  519         state = vaesdq_u8(state, key13);
  520         state = vaesimcq_u8(state);
  521         state = vaesdq_u8(state, key12);
  522         state = vaesimcq_u8(state);
  523         state = vaesdq_u8(state, key11);
  524         state = vaesimcq_u8(state);
  525         state = vaesdq_u8(state, key10);
  526         state = vaesimcq_u8(state);
  527         state = vaesdq_u8(state, key9);
  528         state = vaesimcq_u8(state);
  529         state = vaesdq_u8(state, key8);
  530         state = vaesimcq_u8(state);
  531         state = vaesdq_u8(state, key7);
  532         state = vaesimcq_u8(state);
  533         state = vaesdq_u8(state, key6);
  534         state = vaesimcq_u8(state);
  535         state = vaesdq_u8(state, key5);
  536         state = vaesimcq_u8(state);
  537         state = vaesdq_u8(state, key4);
  538         state = vaesimcq_u8(state);
  539         state = vaesdq_u8(state, key3);
  540         state = vaesimcq_u8(state);
  541         state = vaesdq_u8(state, key2);
  542         /* AddRoundKey */
  543         state = veorq_u8(state, key1);
  544 
  545 #if defined(HAVE_UNALIGNED_ACCESS)
  546         vst1q_u8(output, state);
  547 #else
  548         if ((uintptr_t)output & 0x7) {
  549             vst1q_u8(__builtin_assume_aligned(buf, 16), state);
  550             memcpy(output, buf, 16);
  551         } else {
  552             vst1q_u8(__builtin_assume_aligned(output, 8), state);
  553         }
  554 #endif
  555         output += 16;
  556     }
  557 
  558     return SECSuccess;
  559 }
  560 
  561 SECStatus
  562 arm_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output,
  563                         unsigned int *outputLen,
  564                         unsigned int maxOutputLen,
  565                         const unsigned char *input,
  566                         unsigned int inputLen,
  567                         unsigned int blocksize)
  568 {
  569 #if !defined(HAVE_UNALIGNED_ACCESS)
  570     pre_align unsigned char buf[16] post_align;
  571 #endif
  572     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
  573     uint8x16_t key11, key12, key13;
  574     uint8x16_t iv;
  575     PRUint8 *key = (PRUint8 *)cx->k.expandedKey;
  576 
  577     if (!inputLen) {
  578         return SECSuccess;
  579     }
  580 
  581     /* iv */
  582     iv = vld1q_u8(cx->iv);
  583 
  584     /* expanedKey */
  585     key1 = vld1q_u8(key);
  586     key2 = vld1q_u8(key + 16);
  587     key3 = vld1q_u8(key + 32);
  588     key4 = vld1q_u8(key + 48);
  589     key5 = vld1q_u8(key + 64);
  590     key6 = vld1q_u8(key + 80);
  591     key7 = vld1q_u8(key + 96);
  592     key8 = vld1q_u8(key + 112);
  593     key9 = vld1q_u8(key + 128);
  594     key10 = vld1q_u8(key + 144);
  595     key11 = vld1q_u8(key + 160);
  596     key12 = vld1q_u8(key + 176);
  597     key13 = vld1q_u8(key + 192);
  598 
  599     while (inputLen > 0) {
  600         uint8x16_t state;
  601 #if defined(HAVE_UNALIGNED_ACCESS)
  602         state = vld1q_u8(input);
  603 #else
  604         if ((uintptr_t)input & 0x7) {
  605             memcpy(buf, input, 16);
  606             state = vld1q_u8(__builtin_assume_aligned(buf, 16));
  607         } else {
  608             state = vld1q_u8(__builtin_assume_aligned(input, 8));
  609         }
  610 #endif
  611         input += 16;
  612         inputLen -= 16;
  613 
  614         state = veorq_u8(state, iv);
  615 
  616         /* Rounds */
  617         state = vaeseq_u8(state, key1);
  618         state = vaesmcq_u8(state);
  619         state = vaeseq_u8(state, key2);
  620         state = vaesmcq_u8(state);
  621         state = vaeseq_u8(state, key3);
  622         state = vaesmcq_u8(state);
  623         state = vaeseq_u8(state, key4);
  624         state = vaesmcq_u8(state);
  625         state = vaeseq_u8(state, key5);
  626         state = vaesmcq_u8(state);
  627         state = vaeseq_u8(state, key6);
  628         state = vaesmcq_u8(state);
  629         state = vaeseq_u8(state, key7);
  630         state = vaesmcq_u8(state);
  631         state = vaeseq_u8(state, key8);
  632         state = vaesmcq_u8(state);
  633         state = vaeseq_u8(state, key9);
  634         state = vaesmcq_u8(state);
  635         state = vaeseq_u8(state, key10);
  636         state = vaesmcq_u8(state);
  637         state = vaeseq_u8(state, key11);
  638         state = vaesmcq_u8(state);
  639         state = vaeseq_u8(state, key12);
  640         state = veorq_u8(state, key13);
  641 
  642 #if defined(HAVE_UNALIGNED_ACCESS)
  643         vst1q_u8(output, state);
  644 #else
  645         if ((uintptr_t)output & 0x7) {
  646             vst1q_u8(__builtin_assume_aligned(buf, 16), state);
  647             memcpy(output, buf, 16);
  648         } else {
  649             vst1q_u8(__builtin_assume_aligned(output, 8), state);
  650         }
  651 #endif
  652         output += 16;
  653         iv = state;
  654     }
  655     vst1q_u8(cx->iv, iv);
  656 
  657     return SECSuccess;
  658 }
  659 
  660 SECStatus
  661 arm_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output,
  662                         unsigned int *outputLen,
  663                         unsigned int maxOutputLen,
  664                         const unsigned char *input,
  665                         unsigned int inputLen,
  666                         unsigned int blocksize)
  667 {
  668 #if !defined(HAVE_UNALIGNED_ACCESS)
  669     pre_align unsigned char buf[16] post_align;
  670 #endif
  671     uint8x16_t iv;
  672     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
  673     uint8x16_t key11, key12, key13;
  674     const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
  675 
  676     if (!inputLen) {
  677         return SECSuccess;
  678     }
  679 
  680     /* iv */
  681     iv = vld1q_u8(cx->iv);
  682 
  683     /* expanedKey */
  684     key1 = vld1q_u8(key);
  685     key2 = vld1q_u8(key + 16);
  686     key3 = vld1q_u8(key + 32);
  687     key4 = vld1q_u8(key + 48);
  688     key5 = vld1q_u8(key + 64);
  689     key6 = vld1q_u8(key + 80);
  690     key7 = vld1q_u8(key + 96);
  691     key8 = vld1q_u8(key + 112);
  692     key9 = vld1q_u8(key + 128);
  693     key10 = vld1q_u8(key + 144);
  694     key11 = vld1q_u8(key + 160);
  695     key12 = vld1q_u8(key + 176);
  696     key13 = vld1q_u8(key + 192);
  697 
  698     while (inputLen > 0) {
  699         uint8x16_t state, old_state;
  700 #if defined(HAVE_UNALIGNED_ACCESS)
  701         state = vld1q_u8(input);
  702 #else
  703         if ((uintptr_t)input & 0x7) {
  704             memcpy(buf, input, 16);
  705             state = vld1q_u8(__builtin_assume_aligned(buf, 16));
  706         } else {
  707             state = vld1q_u8(__builtin_assume_aligned(input, 8));
  708         }
  709 #endif
  710         old_state = state;
  711         input += 16;
  712         inputLen -= 16;
  713 
  714         /* Rounds */
  715         state = vaesdq_u8(state, key13);
  716         state = vaesimcq_u8(state);
  717         state = vaesdq_u8(state, key12);
  718         state = vaesimcq_u8(state);
  719         state = vaesdq_u8(state, key11);
  720         state = vaesimcq_u8(state);
  721         state = vaesdq_u8(state, key10);
  722         state = vaesimcq_u8(state);
  723         state = vaesdq_u8(state, key9);
  724         state = vaesimcq_u8(state);
  725         state = vaesdq_u8(state, key8);
  726         state = vaesimcq_u8(state);
  727         state = vaesdq_u8(state, key7);
  728         state = vaesimcq_u8(state);
  729         state = vaesdq_u8(state, key6);
  730         state = vaesimcq_u8(state);
  731         state = vaesdq_u8(state, key5);
  732         state = vaesimcq_u8(state);
  733         state = vaesdq_u8(state, key4);
  734         state = vaesimcq_u8(state);
  735         state = vaesdq_u8(state, key3);
  736         state = vaesimcq_u8(state);
  737         state = vaesdq_u8(state, key2);
  738         /* AddRoundKey */
  739         state = veorq_u8(state, key1);
  740 
  741         state = veorq_u8(state, iv);
  742 
  743 #if defined(HAVE_UNALIGNED_ACCESS)
  744         vst1q_u8(output, state);
  745 #else
  746         if ((uintptr_t)output & 0x7) {
  747             vst1q_u8(__builtin_assume_aligned(buf, 16), state);
  748             memcpy(output, buf, 16);
  749         } else {
  750             vst1q_u8(__builtin_assume_aligned(output, 8), state);
  751         }
  752 #endif
  753         output += 16;
  754 
  755         iv = old_state;
  756     }
  757     vst1q_u8(cx->iv, iv);
  758 
  759     return SECSuccess;
  760 }
  761 
  762 SECStatus
  763 arm_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output,
  764                         unsigned int *outputLen,
  765                         unsigned int maxOutputLen,
  766                         const unsigned char *input,
  767                         unsigned int inputLen,
  768                         unsigned int blocksize)
  769 {
  770 #if !defined(HAVE_UNALIGNED_ACCESS)
  771     pre_align unsigned char buf[16] post_align;
  772 #endif
  773     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
  774     uint8x16_t key11, key12, key13, key14, key15;
  775     PRUint8 *key = (PRUint8 *)cx->k.expandedKey;
  776 
  777     if (inputLen == 0) {
  778         return SECSuccess;
  779     }
  780 
  781     key1 = vld1q_u8(key);
  782     key2 = vld1q_u8(key + 16);
  783     key3 = vld1q_u8(key + 32);
  784     key4 = vld1q_u8(key + 48);
  785     key5 = vld1q_u8(key + 64);
  786     key6 = vld1q_u8(key + 80);
  787     key7 = vld1q_u8(key + 96);
  788     key8 = vld1q_u8(key + 112);
  789     key9 = vld1q_u8(key + 128);
  790     key10 = vld1q_u8(key + 144);
  791     key11 = vld1q_u8(key + 160);
  792     key12 = vld1q_u8(key + 176);
  793     key13 = vld1q_u8(key + 192);
  794     key14 = vld1q_u8(key + 208);
  795     key15 = vld1q_u8(key + 224);
  796 
  797     while (inputLen > 0) {
  798         uint8x16_t state;
  799 #if defined(HAVE_UNALIGNED_ACCESS)
  800         state = vld1q_u8(input);
  801 #else
  802         if ((uintptr_t)input & 0x7) {
  803             memcpy(buf, input, 16);
  804             state = vld1q_u8(__builtin_assume_aligned(buf, 16));
  805         } else {
  806             state = vld1q_u8(__builtin_assume_aligned(input, 8));
  807         }
  808 #endif
  809         input += 16;
  810         inputLen -= 16;
  811 
  812         /* Rounds */
  813         state = vaeseq_u8(state, key1);
  814         state = vaesmcq_u8(state);
  815         state = vaeseq_u8(state, key2);
  816         state = vaesmcq_u8(state);
  817         state = vaeseq_u8(state, key3);
  818         state = vaesmcq_u8(state);
  819         state = vaeseq_u8(state, key4);
  820         state = vaesmcq_u8(state);
  821         state = vaeseq_u8(state, key5);
  822         state = vaesmcq_u8(state);
  823         state = vaeseq_u8(state, key6);
  824         state = vaesmcq_u8(state);
  825         state = vaeseq_u8(state, key7);
  826         state = vaesmcq_u8(state);
  827         state = vaeseq_u8(state, key8);
  828         state = vaesmcq_u8(state);
  829         state = vaeseq_u8(state, key9);
  830         state = vaesmcq_u8(state);
  831         state = vaeseq_u8(state, key10);
  832         state = vaesmcq_u8(state);
  833         state = vaeseq_u8(state, key11);
  834         state = vaesmcq_u8(state);
  835         state = vaeseq_u8(state, key12);
  836         state = vaesmcq_u8(state);
  837         state = vaeseq_u8(state, key13);
  838         state = vaesmcq_u8(state);
  839         state = vaeseq_u8(state, key14);
  840         /* AddRoundKey */
  841         state = veorq_u8(state, key15);
  842 
  843 #if defined(HAVE_UNALIGNED_ACCESS)
  844         vst1q_u8(output, state);
  845 #else
  846         if ((uintptr_t)output & 0x7) {
  847             vst1q_u8(__builtin_assume_aligned(buf, 16), state);
  848             memcpy(output, buf, 16);
  849         } else {
  850             vst1q_u8(__builtin_assume_aligned(output, 8), state);
  851         }
  852 #endif
  853         output += 16;
  854     }
  855     return SECSuccess;
  856 }
  857 
  858 SECStatus
  859 arm_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output,
  860                         unsigned int *outputLen,
  861                         unsigned int maxOutputLen,
  862                         const unsigned char *input,
  863                         unsigned int inputLen,
  864                         unsigned int blocksize)
  865 {
  866 #if !defined(HAVE_UNALIGNED_ACCESS)
  867     pre_align unsigned char buf[16] post_align;
  868 #endif
  869     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
  870     uint8x16_t key11, key12, key13, key14, key15;
  871     const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
  872 
  873     if (!inputLen) {
  874         return SECSuccess;
  875     }
  876 
  877     key1 = vld1q_u8(key);
  878     key2 = vld1q_u8(key + 16);
  879     key3 = vld1q_u8(key + 32);
  880     key4 = vld1q_u8(key + 48);
  881     key5 = vld1q_u8(key + 64);
  882     key6 = vld1q_u8(key + 80);
  883     key7 = vld1q_u8(key + 96);
  884     key8 = vld1q_u8(key + 112);
  885     key9 = vld1q_u8(key + 128);
  886     key10 = vld1q_u8(key + 144);
  887     key11 = vld1q_u8(key + 160);
  888     key12 = vld1q_u8(key + 176);
  889     key13 = vld1q_u8(key + 192);
  890     key14 = vld1q_u8(key + 208);
  891     key15 = vld1q_u8(key + 224);
  892 
  893     while (inputLen > 0) {
  894         uint8x16_t state;
  895 #if defined(HAVE_UNALIGNED_ACCESS)
  896         state = vld1q_u8(input);
  897 #else
  898         if ((uintptr_t)input & 0x7) {
  899             memcpy(buf, input, 16);
  900             state = vld1q_u8(__builtin_assume_aligned(buf, 16));
  901         } else {
  902             state = vld1q_u8(__builtin_assume_aligned(input, 8));
  903         }
  904 #endif
  905         input += 16;
  906         inputLen -= 16;
  907 
  908         /* Rounds */
  909         state = vaesdq_u8(state, key15);
  910         state = vaesimcq_u8(state);
  911         state = vaesdq_u8(state, key14);
  912         state = vaesimcq_u8(state);
  913         state = vaesdq_u8(state, key13);
  914         state = vaesimcq_u8(state);
  915         state = vaesdq_u8(state, key12);
  916         state = vaesimcq_u8(state);
  917         state = vaesdq_u8(state, key11);
  918         state = vaesimcq_u8(state);
  919         state = vaesdq_u8(state, key10);
  920         state = vaesimcq_u8(state);
  921         state = vaesdq_u8(state, key9);
  922         state = vaesimcq_u8(state);
  923         state = vaesdq_u8(state, key8);
  924         state = vaesimcq_u8(state);
  925         state = vaesdq_u8(state, key7);
  926         state = vaesimcq_u8(state);
  927         state = vaesdq_u8(state, key6);
  928         state = vaesimcq_u8(state);
  929         state = vaesdq_u8(state, key5);
  930         state = vaesimcq_u8(state);
  931         state = vaesdq_u8(state, key4);
  932         state = vaesimcq_u8(state);
  933         state = vaesdq_u8(state, key3);
  934         state = vaesimcq_u8(state);
  935         state = vaesdq_u8(state, key2);
  936         /* AddRoundKey */
  937         state = veorq_u8(state, key1);
  938 
  939 #if defined(HAVE_UNALIGNED_ACCESS)
  940         vst1q_u8(output, state);
  941 #else
  942         if ((uintptr_t)output & 0x7) {
  943             vst1q_u8(__builtin_assume_aligned(buf, 16), state);
  944             memcpy(output, buf, 16);
  945         } else {
  946             vst1q_u8(__builtin_assume_aligned(output, 8), state);
  947         }
  948 #endif
  949         output += 16;
  950     }
  951 
  952     return SECSuccess;
  953 }
  954 
  955 SECStatus
  956 arm_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output,
  957                         unsigned int *outputLen,
  958                         unsigned int maxOutputLen,
  959                         const unsigned char *input,
  960                         unsigned int inputLen,
  961                         unsigned int blocksize)
  962 {
  963 #if !defined(HAVE_UNALIGNED_ACCESS)
  964     pre_align unsigned char buf[16] post_align;
  965 #endif
  966     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
  967     uint8x16_t key11, key12, key13, key14, key15;
  968     uint8x16_t iv;
  969     const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
  970 
  971     if (!inputLen) {
  972         return SECSuccess;
  973     }
  974 
  975     /* iv */
  976     iv = vld1q_u8(cx->iv);
  977 
  978     /* expanedKey */
  979     key1 = vld1q_u8(key);
  980     key2 = vld1q_u8(key + 16);
  981     key3 = vld1q_u8(key + 32);
  982     key4 = vld1q_u8(key + 48);
  983     key5 = vld1q_u8(key + 64);
  984     key6 = vld1q_u8(key + 80);
  985     key7 = vld1q_u8(key + 96);
  986     key8 = vld1q_u8(key + 112);
  987     key9 = vld1q_u8(key + 128);
  988     key10 = vld1q_u8(key + 144);
  989     key11 = vld1q_u8(key + 160);
  990     key12 = vld1q_u8(key + 176);
  991     key13 = vld1q_u8(key + 192);
  992     key14 = vld1q_u8(key + 208);
  993     key15 = vld1q_u8(key + 224);
  994 
  995     while (inputLen > 0) {
  996         uint8x16_t state;
  997 #if defined(HAVE_UNALIGNED_ACCESS)
  998         state = vld1q_u8(input);
  999 #else
 1000         if ((uintptr_t)input & 0x7) {
 1001             memcpy(buf, input, 16);
 1002             state = vld1q_u8(__builtin_assume_aligned(buf, 16));
 1003         } else {
 1004             state = vld1q_u8(__builtin_assume_aligned(input, 8));
 1005         }
 1006 #endif
 1007         input += 16;
 1008         inputLen -= 16;
 1009 
 1010         state = veorq_u8(state, iv);
 1011 
 1012         /* Rounds */
 1013         state = vaeseq_u8(state, key1);
 1014         state = vaesmcq_u8(state);
 1015         state = vaeseq_u8(state, key2);
 1016         state = vaesmcq_u8(state);
 1017         state = vaeseq_u8(state, key3);
 1018         state = vaesmcq_u8(state);
 1019         state = vaeseq_u8(state, key4);
 1020         state = vaesmcq_u8(state);
 1021         state = vaeseq_u8(state, key5);
 1022         state = vaesmcq_u8(state);
 1023         state = vaeseq_u8(state, key6);
 1024         state = vaesmcq_u8(state);
 1025         state = vaeseq_u8(state, key7);
 1026         state = vaesmcq_u8(state);
 1027         state = vaeseq_u8(state, key8);
 1028         state = vaesmcq_u8(state);
 1029         state = vaeseq_u8(state, key9);
 1030         state = vaesmcq_u8(state);
 1031         state = vaeseq_u8(state, key10);
 1032         state = vaesmcq_u8(state);
 1033         state = vaeseq_u8(state, key11);
 1034         state = vaesmcq_u8(state);
 1035         state = vaeseq_u8(state, key12);
 1036         state = vaesmcq_u8(state);
 1037         state = vaeseq_u8(state, key13);
 1038         state = vaesmcq_u8(state);
 1039         state = vaeseq_u8(state, key14);
 1040         /* AddRoundKey */
 1041         state = veorq_u8(state, key15);
 1042 
 1043 #if defined(HAVE_UNALIGNED_ACCESS)
 1044         vst1q_u8(output, state);
 1045 #else
 1046         if ((uintptr_t)output & 0x7) {
 1047             vst1q_u8(__builtin_assume_aligned(buf, 16), state);
 1048             memcpy(output, buf, 16);
 1049         } else {
 1050             vst1q_u8(__builtin_assume_aligned(output, 8), state);
 1051         }
 1052 #endif
 1053         output += 16;
 1054         iv = state;
 1055     }
 1056     vst1q_u8(cx->iv, iv);
 1057 
 1058     return SECSuccess;
 1059 }
 1060 
 1061 SECStatus
 1062 arm_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output,
 1063                         unsigned int *outputLen,
 1064                         unsigned int maxOutputLen,
 1065                         const unsigned char *input,
 1066                         unsigned int inputLen,
 1067                         unsigned int blocksize)
 1068 {
 1069 #if !defined(HAVE_UNALIGNED_ACCESS)
 1070     pre_align unsigned char buf[16] post_align;
 1071 #endif
 1072     uint8x16_t iv;
 1073     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
 1074     uint8x16_t key11, key12, key13, key14, key15;
 1075     const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
 1076 
 1077     if (!inputLen) {
 1078         return SECSuccess;
 1079     }
 1080 
 1081     /* iv */
 1082     iv = vld1q_u8(cx->iv);
 1083 
 1084     /* expanedKey */
 1085     key1 = vld1q_u8(key);
 1086     key2 = vld1q_u8(key + 16);
 1087     key3 = vld1q_u8(key + 32);
 1088     key4 = vld1q_u8(key + 48);
 1089     key5 = vld1q_u8(key + 64);
 1090     key6 = vld1q_u8(key + 80);
 1091     key7 = vld1q_u8(key + 96);
 1092     key8 = vld1q_u8(key + 112);
 1093     key9 = vld1q_u8(key + 128);
 1094     key10 = vld1q_u8(key + 144);
 1095     key11 = vld1q_u8(key + 160);
 1096     key12 = vld1q_u8(key + 176);
 1097     key13 = vld1q_u8(key + 192);
 1098     key14 = vld1q_u8(key + 208);
 1099     key15 = vld1q_u8(key + 224);
 1100 
 1101     while (inputLen > 0) {
 1102         uint8x16_t state, old_state;
 1103 #if defined(HAVE_UNALIGNED_ACCESS)
 1104         state = vld1q_u8(input);
 1105 #else
 1106         if ((uintptr_t)input & 0x7) {
 1107             memcpy(buf, input, 16);
 1108             state = vld1q_u8(__builtin_assume_aligned(buf, 16));
 1109         } else {
 1110             state = vld1q_u8(__builtin_assume_aligned(input, 8));
 1111         }
 1112 #endif
 1113         old_state = state;
 1114         input += 16;
 1115         inputLen -= 16;
 1116 
 1117         /* Rounds */
 1118         state = vaesdq_u8(state, key15);
 1119         state = vaesimcq_u8(state);
 1120         state = vaesdq_u8(state, key14);
 1121         state = vaesimcq_u8(state);
 1122         state = vaesdq_u8(state, key13);
 1123         state = vaesimcq_u8(state);
 1124         state = vaesdq_u8(state, key12);
 1125         state = vaesimcq_u8(state);
 1126         state = vaesdq_u8(state, key11);
 1127         state = vaesimcq_u8(state);
 1128         state = vaesdq_u8(state, key10);
 1129         state = vaesimcq_u8(state);
 1130         state = vaesdq_u8(state, key9);
 1131         state = vaesimcq_u8(state);
 1132         state = vaesdq_u8(state, key8);
 1133         state = vaesimcq_u8(state);
 1134         state = vaesdq_u8(state, key7);
 1135         state = vaesimcq_u8(state);
 1136         state = vaesdq_u8(state, key6);
 1137         state = vaesimcq_u8(state);
 1138         state = vaesdq_u8(state, key5);
 1139         state = vaesimcq_u8(state);
 1140         state = vaesdq_u8(state, key4);
 1141         state = vaesimcq_u8(state);
 1142         state = vaesdq_u8(state, key3);
 1143         state = vaesimcq_u8(state);
 1144         state = vaesdq_u8(state, key2);
 1145         /* AddRoundKey */
 1146         state = veorq_u8(state, key1);
 1147 
 1148         state = veorq_u8(state, iv);
 1149 
 1150 #if defined(HAVE_UNALIGNED_ACCESS)
 1151         vst1q_u8(output, state);
 1152 #else
 1153         if ((uintptr_t)output & 0x7) {
 1154             vst1q_u8(__builtin_assume_aligned(buf, 16), state);
 1155             memcpy(output, buf, 16);
 1156         } else {
 1157             vst1q_u8(__builtin_assume_aligned(output, 8), state);
 1158         }
 1159 #endif
 1160         output += 16;
 1161 
 1162         iv = old_state;
 1163     }
 1164     vst1q_u8(cx->iv, iv);
 1165 
 1166     return SECSuccess;
 1167 }
 1168 
 1169 #endif