"Fossies" - the Fresh Open Source Software Archive

Member "src/Crypto/AesSmall_x86.asm" (10 Oct 2018, 37513 Bytes) of package /windows/misc/VeraCrypt_1.23-Hotfix-2_Source.zip:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Generic Assembler source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 
    2 ; ---------------------------------------------------------------------------
    3 ; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
    4 ;
    5 ; LICENSE TERMS
    6 ;
    7 ; The free distribution and use of this software is allowed (with or without
    8 ; changes) provided that:
    9 ;
   10 ;  1. source code distributions include the above copyright notice, this
   11 ;     list of conditions and the following disclaimer;
   12 ;
   13 ;  2. binary distributions include the above copyright notice, this list
   14 ;     of conditions and the following disclaimer in their documentation;
   15 ;
   16 ;  3. the name of the copyright holder is not used to endorse products
   17 ;     built using this software without specific written permission.
   18 ;
   19 ; DISCLAIMER
   20 ;
   21 ; This software is provided 'as is' with no explicit or implied warranties
   22 ; in respect of its properties, including, but not limited to, correctness
   23 ; and/or fitness for purpose.
   24 ; ---------------------------------------------------------------------------
   25 ; Issue 20/12/2007
   26 ;
   27 ; This code requires either ASM_X86_V2 or ASM_X86_V2C to be set in aesopt.h
   28 ; and the same define to be set here as well. If AES_V2C is set this file
   29 ; requires the C files aeskey.c and aestab.c for support.
   30 
   31 ; An AES implementation for x86 processors using the YASM (or NASM) assembler.
   32 ; This is a full assembler implementation covering encryption, decryption and
   33 ; key scheduling. It uses 2k bytes of tables but its encryption and decryption
   34 ; performance is very close to that obtained using large tables.  Key schedule
   35 ; expansion is slower for both encryption and decryption but this is likely to
   36 ; be offset by the much smaller load that this version places on the processor
   37 ; cache. I acknowledge the contribution made by Daniel Bernstein to aspects of
   38 ; the design of the AES round function used here.
   39 ;
   40 ; This code provides the standard AES block size (128 bits, 16 bytes) and the
   41 ; three standard AES key sizes (128, 192 and 256 bits). It has the same call
   42 ; interface as my C implementation. The ebx, esi, edi and ebp registers are
   43 ; preserved across calls but eax, ecx and edx and the artihmetic status flags
   44 ; are not.  Although this is a full assembler implementation, it can be used
   45 ; in conjunction with my C code which provides faster key scheduling using
   46 ; large tables. In this case aeskey.c should be compiled with ASM_X86_V2C
   47 ; defined.  It is also important that the defines below match those used in the
   48 ; C code.  This code uses the VC++ register saving conentions; if it is used
   49 ; with another compiler, conventions for using and saving registers may need
   50 ; to be checked (and calling conventions).  The YASM command line for the VC++
   51 ; custom build step is:
   52 ;
   53 ;    yasm -Xvc -f win32 -D <Z> -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
   54 ;
   55 ; For the cryptlib build this is (pcg):
   56 ;
   57 ;   yasm -Xvc -f win32 -D ASM_X86_V2C -o aescrypt2.obj aes_x86_v2.asm
   58 ;
   59 ; where <Z> is ASM_X86_V2 or ASM_X86_V2C.  The calling intefaces are:
   60 ;
   61 ;     AES_RETURN aes_encrypt(const unsigned char in_blk[],
   62 ;                   unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
   63 ;
   64 ;     AES_RETURN aes_decrypt(const unsigned char in_blk[],
   65 ;                   unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
   66 ;
   67 ;     AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
   68 ;                                            const aes_encrypt_ctx cx[1]);
   69 ;
   70 ;     AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
   71 ;                                            const aes_decrypt_ctx cx[1]);
   72 ;
   73 ;     AES_RETURN aes_encrypt_key(const unsigned char key[],
   74 ;                           unsigned int len, const aes_decrypt_ctx cx[1]);
   75 ;
   76 ;     AES_RETURN aes_decrypt_key(const unsigned char key[],
   77 ;                           unsigned int len, const aes_decrypt_ctx cx[1]);
   78 ;
   79 ; where <NNN> is 128, 102 or 256.  In the last two calls the length can be in
   80 ; either bits or bytes.
   81 
   82 ; The DLL interface must use the _stdcall convention in which the number
   83 ; of bytes of parameter space is added after an @ to the sutine's name.
   84 ; We must also remove our parameters from the stack before return (see
   85 ; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.
   86 
   87 ;
   88 ; Adapted for TrueCrypt:
   89 ; - All tables generated at run-time
   90 ; - Adapted for 16-bit environment
   91 ;
   92 
   93 CPU 386
   94 USE16
   95 SEGMENT _TEXT PUBLIC CLASS=CODE USE16
   96 SEGMENT _DATA PUBLIC CLASS=DATA USE16
   97 
   98 GROUP DGROUP _TEXT _DATA
   99 
  100 extern _aes_dec_tab     ; Aestab.c
  101 extern _aes_enc_tab
  102 
  103 ; %define DLL_EXPORT
  104 
  105 ; The size of the code can be reduced by using functions for the encryption
  106 ; and decryption rounds in place of macro expansion
  107 
  108 %define REDUCE_CODE_SIZE
  109 
  110 ; Comment in/out the following lines to obtain the desired subroutines. These
  111 ; selections MUST match those in the C header file aes.h
  112 
  113 ; %define AES_128                 ; define if AES with 128 bit keys is needed
  114 ; %define AES_192                 ; define if AES with 192 bit keys is needed
  115 %define AES_256                 ; define if AES with 256 bit keys is needed
  116 ; %define AES_VAR                 ; define if a variable key size is needed
  117 %define ENCRYPTION              ; define if encryption is needed
  118 %define DECRYPTION              ; define if decryption is needed
  119 ; %define AES_REV_DKS             ; define if key decryption schedule is reversed
  120 
  121 %ifndef ASM_X86_V2C
  122 %define ENCRYPTION_KEY_SCHEDULE ; define if encryption key expansion is needed
  123 %define DECRYPTION_KEY_SCHEDULE ; define if decryption key expansion is needed
  124 %endif
  125 
  126 ; The encryption key schedule has the following in memory layout where N is the
  127 ; number of rounds (10, 12 or 14):
  128 ;
  129 ; lo: | input key (round 0)  |  ; each round is four 32-bit words
  130 ;     | encryption round 1   |
  131 ;     | encryption round 2   |
  132 ;     ....
  133 ;     | encryption round N-1 |
  134 ; hi: | encryption round N   |
  135 ;
  136 ; The decryption key schedule is normally set up so that it has the same
  137 ; layout as above by actually reversing the order of the encryption key
  138 ; schedule in memory (this happens when AES_REV_DKS is set):
  139 ;
  140 ; lo: | decryption round 0   | =              | encryption round N   |
  141 ;     | decryption round 1   | = INV_MIX_COL[ | encryption round N-1 | ]
  142 ;     | decryption round 2   | = INV_MIX_COL[ | encryption round N-2 | ]
  143 ;     ....                       ....
  144 ;     | decryption round N-1 | = INV_MIX_COL[ | encryption round 1   | ]
  145 ; hi: | decryption round N   | =              | input key (round 0)  |
  146 ;
  147 ; with rounds except the first and last modified using inv_mix_column()
  148 ; But if AES_REV_DKS is NOT set the order of keys is left as it is for
  149 ; encryption so that it has to be accessed in reverse when used for
  150 ; decryption (although the inverse mix column modifications are done)
  151 ;
  152 ; lo: | decryption round 0   | =              | input key (round 0)  |
  153 ;     | decryption round 1   | = INV_MIX_COL[ | encryption round 1   | ]
  154 ;     | decryption round 2   | = INV_MIX_COL[ | encryption round 2   | ]
  155 ;     ....                       ....
  156 ;     | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
  157 ; hi: | decryption round N   | =              | encryption round N   |
  158 ;
  159 ; This layout is faster when the assembler key scheduling provided here
  160 ; is used.
  161 ;
  162 ; End of user defines
  163 
  164 %ifdef AES_VAR
  165 %ifndef AES_128
  166 %define AES_128
  167 %endif
  168 %ifndef AES_192
  169 %define AES_192
  170 %endif
  171 %ifndef AES_256
  172 %define AES_256
  173 %endif
  174 %endif
  175 
  176 %ifdef AES_VAR
  177 %define KS_LENGTH       60
  178 %elifdef AES_256
  179 %define KS_LENGTH       60
  180 %elifdef AES_192
  181 %define KS_LENGTH       52
  182 %else
  183 %define KS_LENGTH       44
  184 %endif
  185 
  186 ; These macros implement stack based local variables
  187 
  188 %macro  save 2
  189     mov     [esp+4*%1],%2
  190 %endmacro
  191 
  192 %macro  restore 2
  193     mov     %1,[esp+4*%2]
  194 %endmacro
  195 
  196 %ifdef  REDUCE_CODE_SIZE
  197     %macro mf_call 1
  198         call %1
  199     %endmacro
  200 %else
  201     %macro mf_call 1
  202         %1
  203     %endmacro
  204 %endif
  205 
  206 ; the DLL has to implement the _stdcall calling interface on return
  207 ; In this case we have to take our parameters (3 4-byte pointers)
  208 ; off the stack
  209 
  210 %define parms 12
  211 
  212 %macro  do_name 1-2 parms
  213 %ifndef DLL_EXPORT
  214     global  %1
  215 %1:
  216 %else
  217     global  %1@%2
  218     export  %1@%2
  219 %1@%2:
  220 %endif
  221 %endmacro
  222 
  223 %macro  do_call 1-2 parms
  224 %ifndef DLL_EXPORT
  225     call    %1
  226     add     esp,%2
  227 %else
  228     call    %1@%2
  229 %endif
  230 %endmacro
  231 
  232 %macro  do_exit  0-1 parms
  233 %ifdef DLL_EXPORT
  234     ret %1
  235 %else
  236     ret
  237 %endif
  238 %endmacro
  239 
  240 ; finite field multiplies by {02}, {04} and {08}
  241 
  242 %define f2(x)   ((x<<1)^(((x>>7)&1)*0x11b))
  243 %define f4(x)   ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
  244 %define f8(x)   ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
  245 
  246 ; finite field multiplies required in table generation
  247 
  248 %define f3(x)   (f2(x) ^ x)
  249 %define f9(x)   (f8(x) ^ x)
  250 %define fb(x)   (f8(x) ^ f2(x) ^ x)
  251 %define fd(x)   (f8(x) ^ f4(x) ^ x)
  252 %define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
  253 
  254 %define etab_0(x)   [_aes_enc_tab+4+8*x]
  255 %define etab_1(x)   [_aes_enc_tab+3+8*x]
  256 %define etab_2(x)   [_aes_enc_tab+2+8*x]
  257 %define etab_3(x)   [_aes_enc_tab+1+8*x]
  258 %define etab_b(x)   byte [_aes_enc_tab+1+8*x] ; used with movzx for 0x000000xx
  259 %define etab_w(x)   word [_aes_enc_tab+8*x]   ; used with movzx for 0x0000xx00
  260 
  261 %define btab_0(x)   [_aes_enc_tab+6+8*x]
  262 %define btab_1(x)   [_aes_enc_tab+5+8*x]
  263 %define btab_2(x)   [_aes_enc_tab+4+8*x]
  264 %define btab_3(x)   [_aes_enc_tab+3+8*x]
  265 
  266 ; ROUND FUNCTION.  Build column[2] on ESI and column[3] on EDI that have the
  267 ; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
  268 ;
  269 ; Input:
  270 ;
  271 ;   EAX     column[0]
  272 ;   EBX     column[1]
  273 ;   ECX     column[2]
  274 ;   EDX     column[3]
  275 ;   ESI     column key[round][2]
  276 ;   EDI     column key[round][3]
  277 ;   EBP     scratch
  278 ;
  279 ; Output:
  280 ;
  281 ;   EBP     column[0]   unkeyed
  282 ;   EBX     column[1]   unkeyed
  283 ;   ESI     column[2]   keyed
  284 ;   EDI     column[3]   keyed
  285 ;   EAX     scratch
  286 ;   ECX     scratch
  287 ;   EDX     scratch
  288 
  289 %macro rnd_fun 2
  290 
  291     rol     ebx,16
  292     %1      esi, cl, 0, ebp
  293     %1      esi, dh, 1, ebp
  294     %1      esi, bh, 3, ebp
  295     %1      edi, dl, 0, ebp
  296     %1      edi, ah, 1, ebp
  297     %1      edi, bl, 2, ebp
  298     %2      ebp, al, 0, ebp
  299     shr     ebx,16
  300     and     eax,0xffff0000
  301     or      eax,ebx
  302     shr     edx,16
  303     %1      ebp, ah, 1, ebx
  304     %1      ebp, dh, 3, ebx
  305     %2      ebx, dl, 2, ebx
  306     %1      ebx, ch, 1, edx
  307     %1      ebx, al, 0, edx
  308     shr     eax,16
  309     shr     ecx,16
  310     %1      ebp, cl, 2, edx
  311     %1      edi, ch, 3, edx
  312     %1      esi, al, 2, edx
  313     %1      ebx, ah, 3, edx
  314 
  315 %endmacro
  316 
  317 ; Basic MOV and XOR Operations for normal rounds
  318 
  319 %macro  nr_xor  4
  320     movzx   %4,%2
  321     xor     %1,etab_%3(%4)
  322 %endmacro
  323 
  324 %macro  nr_mov  4
  325     movzx   %4,%2
  326     mov     %1,etab_%3(%4)
  327 %endmacro
  328 
  329 ; Basic MOV and XOR Operations for last round
  330 
  331 %if 1
  332 
  333     %macro  lr_xor  4
  334         movzx   %4,%2
  335         movzx   %4,etab_b(%4)
  336     %if %3 != 0
  337         shl     %4,8*%3
  338     %endif
  339         xor     %1,%4
  340     %endmacro
  341 
  342     %macro  lr_mov  4
  343         movzx   %4,%2
  344         movzx   %1,etab_b(%4)
  345     %if %3 != 0
  346         shl     %1,8*%3
  347     %endif
  348     %endmacro
  349 
  350 %else       ; less effective but worth leaving as an option
  351 
  352     %macro  lr_xor  4
  353         movzx   %4,%2
  354         mov     %4,btab_%3(%4)
  355         and     %4,0x000000ff << 8 * %3
  356         xor     %1,%4
  357     %endmacro
  358 
  359     %macro  lr_mov  4
  360         movzx   %4,%2
  361         mov     %1,btab_%3(%4)
  362         and     %1,0x000000ff << 8 * %3
  363     %endmacro
  364 
  365 %endif
  366 
  367 ; Apply S-Box to the 4 bytes in a 32-bit word and rotate byte positions
  368 
  369 %ifdef REDUCE_CODE_SIZE
  370 
  371 l3s_col:
  372     movzx   ecx,al              ; in      eax
  373     movzx   ecx, etab_b(ecx)    ; out     eax
  374     xor     edx,ecx             ; scratch ecx,edx
  375     movzx   ecx,ah
  376     movzx   ecx, etab_b(ecx)
  377     shl     ecx,8
  378     xor     edx,ecx
  379     shr     eax,16
  380     movzx   ecx,al
  381     movzx   ecx, etab_b(ecx)
  382     shl     ecx,16
  383     xor     edx,ecx
  384     movzx   ecx,ah
  385     movzx   ecx, etab_b(ecx)
  386     shl     ecx,24
  387     xor     edx,ecx
  388     mov     eax,edx
  389     ret
  390 
  391 %else
  392 
  393 %macro l3s_col 0
  394 
  395     movzx   ecx,al              ; in      eax
  396     movzx   ecx, etab_b(ecx)    ; out     eax
  397     xor     edx,ecx             ; scratch ecx,edx
  398     movzx   ecx,ah
  399     movzx   ecx, etab_b(ecx)
  400     shl     ecx,8
  401     xor     edx,ecx
  402     shr     eax,16
  403     movzx   ecx,al
  404     movzx   ecx, etab_b(ecx)
  405     shl     ecx,16
  406     xor     edx,ecx
  407     movzx   ecx,ah
  408     movzx   ecx, etab_b(ecx)
  409     shl     ecx,24
  410     xor     edx,ecx
  411     mov     eax,edx
  412 
  413 %endmacro
  414 
  415 %endif
  416 
  417 ; offsets to parameters
  418 
  419 in_blk  equ     2   ; input byte array address parameter
  420 out_blk equ     4   ; output byte array address parameter
  421 ctx     equ     6   ; AES context structure
  422 stk_spc equ    20   ; stack space
  423 
  424 %ifdef  ENCRYPTION
  425 
  426 ; %define ENCRYPTION_TABLE
  427 
  428 %ifdef REDUCE_CODE_SIZE
  429 
  430 enc_round:
  431     sub     sp, 2
  432     add     ebp,16
  433     save    1,ebp
  434     mov     esi,[ebp+8]
  435     mov     edi,[ebp+12]
  436 
  437     rnd_fun nr_xor, nr_mov
  438 
  439     mov     eax,ebp
  440     mov     ecx,esi
  441     mov     edx,edi
  442     restore ebp,1
  443     xor     eax,[ebp]
  444     xor     ebx,[ebp+4]
  445     add     sp, 2
  446     ret
  447 
  448 %else
  449 
  450 %macro enc_round 0
  451 
  452     add     ebp,16
  453     save    0,ebp
  454     mov     esi,[ebp+8]
  455     mov     edi,[ebp+12]
  456 
  457     rnd_fun nr_xor, nr_mov
  458 
  459     mov     eax,ebp
  460     mov     ecx,esi
  461     mov     edx,edi
  462     restore ebp,0
  463     xor     eax,[ebp]
  464     xor     ebx,[ebp+4]
  465 
  466 %endmacro
  467 
  468 %endif
  469 
  470 %macro enc_last_round 0
  471 
  472     add     ebp,16
  473     save    0,ebp
  474     mov     esi,[ebp+8]
  475     mov     edi,[ebp+12]
  476 
  477     rnd_fun lr_xor, lr_mov
  478 
  479     mov     eax,ebp
  480     restore ebp,0
  481     xor     eax,[ebp]
  482     xor     ebx,[ebp+4]
  483 
  484 %endmacro
  485 
  486     section _TEXT
  487 
  488 ; AES Encryption Subroutine
  489 
  490     do_name _aes_encrypt,12
  491 
  492     mov     ax, sp
  493     movzx   esp, ax
  494 
  495     sub     esp,stk_spc
  496     mov     [esp+16],ebp
  497     mov     [esp+12],ebx
  498     mov     [esp+ 8],esi
  499     mov     [esp+ 4],edi
  500 
  501     movzx   esi,word [esp+in_blk+stk_spc] ; input pointer
  502     mov     eax,[esi   ]
  503     mov     ebx,[esi+ 4]
  504     mov     ecx,[esi+ 8]
  505     mov     edx,[esi+12]
  506 
  507     movzx   ebp,word [esp+ctx+stk_spc]    ; key pointer
  508     movzx   edi,byte [ebp+4*KS_LENGTH]
  509     xor     eax,[ebp   ]
  510     xor     ebx,[ebp+ 4]
  511     xor     ecx,[ebp+ 8]
  512     xor     edx,[ebp+12]
  513 
  514 ; determine the number of rounds
  515 
  516 %ifndef AES_256
  517     cmp     edi,10*16
  518     je      .3
  519     cmp     edi,12*16
  520     je      .2
  521     cmp     edi,14*16
  522     je      .1
  523     mov     eax,-1
  524     jmp     .5
  525 %endif
  526 
  527 .1: mf_call enc_round
  528     mf_call enc_round
  529 .2: mf_call enc_round
  530     mf_call enc_round
  531 .3: mf_call enc_round
  532     mf_call enc_round
  533     mf_call enc_round
  534     mf_call enc_round
  535     mf_call enc_round
  536     mf_call enc_round
  537     mf_call enc_round
  538     mf_call enc_round
  539     mf_call enc_round
  540     enc_last_round
  541 
  542     movzx   edx,word [esp+out_blk+stk_spc]
  543     mov     [edx],eax
  544     mov     [edx+4],ebx
  545     mov     [edx+8],esi
  546     mov     [edx+12],edi
  547     xor     eax,eax
  548 
  549 .5: mov     ebp,[esp+16]
  550     mov     ebx,[esp+12]
  551     mov     esi,[esp+ 8]
  552     mov     edi,[esp+ 4]
  553     add     esp,stk_spc
  554     do_exit 12
  555 
  556 %endif
  557 
  558 %macro f_key 2
  559 
  560     push    ecx
  561     push    edx
  562     mov     edx,esi
  563     ror     eax,8
  564     mf_call l3s_col
  565     mov     esi,eax
  566     pop     edx
  567     pop     ecx
  568     xor     esi,rc_val
  569 
  570     mov     [ebp+%1*%2],esi
  571     xor     edi,esi
  572     mov     [ebp+%1*%2+4],edi
  573     xor     ecx,edi
  574     mov     [ebp+%1*%2+8],ecx
  575     xor     edx,ecx
  576     mov     [ebp+%1*%2+12],edx
  577     mov     eax,edx
  578 
  579 %if %2 == 24
  580 
  581 %if %1 < 7
  582     xor     eax,[ebp+%1*%2+16-%2]
  583     mov     [ebp+%1*%2+16],eax
  584     xor     eax,[ebp+%1*%2+20-%2]
  585     mov     [ebp+%1*%2+20],eax
  586 %endif
  587 
  588 %elif %2 == 32
  589 
  590 %if %1 < 6
  591     push    ecx
  592     push    edx
  593     mov     edx,[ebp+%1*%2+16-%2]
  594     mf_call l3s_col
  595     pop     edx
  596     pop     ecx
  597     mov     [ebp+%1*%2+16],eax
  598     xor     eax,[ebp+%1*%2+20-%2]
  599     mov     [ebp+%1*%2+20],eax
  600     xor     eax,[ebp+%1*%2+24-%2]
  601     mov     [ebp+%1*%2+24],eax
  602     xor     eax,[ebp+%1*%2+28-%2]
  603     mov     [ebp+%1*%2+28],eax
  604 %endif
  605 
  606 %endif
  607 
  608 %assign rc_val f2(rc_val)
  609 
  610 %endmacro
  611 
  612 %ifdef ENCRYPTION_KEY_SCHEDULE
  613 
  614 %ifdef  AES_128
  615 
  616 %ifndef ENCRYPTION_TABLE
  617 ; %define ENCRYPTION_TABLE
  618 %endif
  619 
  620 %assign rc_val  1
  621 
  622     do_name _aes_encrypt_key128,8
  623 
  624     push    ebp
  625     push    ebx
  626     push    esi
  627     push    edi
  628 
  629     mov     ebp,[esp+24]
  630     mov     [ebp+4*KS_LENGTH],dword 10*16
  631     mov     ebx,[esp+20]
  632 
  633     mov     esi,[ebx]
  634     mov     [ebp],esi
  635     mov     edi,[ebx+4]
  636     mov     [ebp+4],edi
  637     mov     ecx,[ebx+8]
  638     mov     [ebp+8],ecx
  639     mov     edx,[ebx+12]
  640     mov     [ebp+12],edx
  641     add     ebp,16
  642     mov     eax,edx
  643 
  644     f_key   0,16        ; 11 * 4 = 44 unsigned longs
  645     f_key   1,16        ; 4 + 4 * 10 generated = 44
  646     f_key   2,16
  647     f_key   3,16
  648     f_key   4,16
  649     f_key   5,16
  650     f_key   6,16
  651     f_key   7,16
  652     f_key   8,16
  653     f_key   9,16
  654 
  655     pop     edi
  656     pop     esi
  657     pop     ebx
  658     pop     ebp
  659     xor     eax,eax
  660     do_exit  8
  661 
  662 %endif
  663 
  664 %ifdef  AES_192
  665 
  666 %ifndef ENCRYPTION_TABLE
  667 ; %define ENCRYPTION_TABLE
  668 %endif
  669 
  670 %assign rc_val  1
  671 
  672     do_name _aes_encrypt_key192,8
  673 
  674     push    ebp
  675     push    ebx
  676     push    esi
  677     push    edi
  678 
  679     mov     ebp,[esp+24]
  680     mov     [ebp+4*KS_LENGTH],dword 12 * 16
  681     mov     ebx,[esp+20]
  682 
  683     mov     esi,[ebx]
  684     mov     [ebp],esi
  685     mov     edi,[ebx+4]
  686     mov     [ebp+4],edi
  687     mov     ecx,[ebx+8]
  688     mov     [ebp+8],ecx
  689     mov     edx,[ebx+12]
  690     mov     [ebp+12],edx
  691     mov     eax,[ebx+16]
  692     mov     [ebp+16],eax
  693     mov     eax,[ebx+20]
  694     mov     [ebp+20],eax
  695     add     ebp,24
  696 
  697     f_key   0,24        ; 13 * 4 = 52 unsigned longs
  698     f_key   1,24        ; 6 + 6 * 8 generated = 54
  699     f_key   2,24
  700     f_key   3,24
  701     f_key   4,24
  702     f_key   5,24
  703     f_key   6,24
  704     f_key   7,24
  705 
  706     pop     edi
  707     pop     esi
  708     pop     ebx
  709     pop     ebp
  710     xor     eax,eax
  711     do_exit  8
  712 
  713 %endif
  714 
  715 %ifdef  AES_256
  716 
  717 %ifndef ENCRYPTION_TABLE
  718 ; %define ENCRYPTION_TABLE
  719 %endif
  720 
  721 %assign rc_val  1
  722 
  723     do_name _aes_encrypt_key256,8
  724 
  725     mov     ax, sp
  726     movzx   esp, ax
  727 
  728     push    ebp
  729     push    ebx
  730     push    esi
  731     push    edi
  732 
  733     movzx   ebp, word [esp+20] ; ks
  734     mov     [ebp+4*KS_LENGTH],dword 14 * 16
  735     movzx   ebx, word [esp+18] ; key
  736 
  737     mov     esi,[ebx]
  738     mov     [ebp],esi
  739     mov     edi,[ebx+4]
  740     mov     [ebp+4],edi
  741     mov     ecx,[ebx+8]
  742     mov     [ebp+8],ecx
  743     mov     edx,[ebx+12]
  744     mov     [ebp+12],edx
  745     mov     eax,[ebx+16]
  746     mov     [ebp+16],eax
  747     mov     eax,[ebx+20]
  748     mov     [ebp+20],eax
  749     mov     eax,[ebx+24]
  750     mov     [ebp+24],eax
  751     mov     eax,[ebx+28]
  752     mov     [ebp+28],eax
  753     add     ebp,32
  754 
  755     f_key   0,32        ; 15 * 4 = 60 unsigned longs
  756     f_key   1,32        ; 8 + 8 * 7 generated = 64
  757     f_key   2,32
  758     f_key   3,32
  759     f_key   4,32
  760     f_key   5,32
  761     f_key   6,32
  762 
  763     pop     edi
  764     pop     esi
  765     pop     ebx
  766     pop     ebp
  767     xor     eax,eax
  768     do_exit  8
  769 
  770 %endif
  771 
  772 %ifdef  AES_VAR
  773 
  774 %ifndef ENCRYPTION_TABLE
  775 ; %define ENCRYPTION_TABLE
  776 %endif
  777 
  778     do_name _aes_encrypt_key,12
  779 
  780     mov     ecx,[esp+4]
  781     mov     eax,[esp+8]
  782     mov     edx,[esp+12]
  783     push    edx
  784     push    ecx
  785 
  786     cmp     eax,16
  787     je      .1
  788     cmp     eax,128
  789     je      .1
  790 
  791     cmp     eax,24
  792     je      .2
  793     cmp     eax,192
  794     je      .2
  795 
  796     cmp     eax,32
  797     je      .3
  798     cmp     eax,256
  799     je      .3
  800     mov     eax,-1
  801     add     esp,8
  802     do_exit 12
  803 
  804 .1: do_call _aes_encrypt_key128,8
  805     do_exit 12
  806 .2: do_call _aes_encrypt_key192,8
  807     do_exit 12
  808 .3: do_call _aes_encrypt_key256,8
  809     do_exit 12
  810 
  811 %endif
  812 
  813 %endif
  814 
  815 %ifdef ENCRYPTION_TABLE
  816 
  817 ; S-box data - 256 entries
  818 
  819     section _DATA
  820 
  821 %define u8(x)   0, x, x, f3(x), f2(x), x, x, f3(x)
  822 
  823 _aes_enc_tab:
  824     db  u8(0x63),u8(0x7c),u8(0x77),u8(0x7b),u8(0xf2),u8(0x6b),u8(0x6f),u8(0xc5)
  825     db  u8(0x30),u8(0x01),u8(0x67),u8(0x2b),u8(0xfe),u8(0xd7),u8(0xab),u8(0x76)
  826     db  u8(0xca),u8(0x82),u8(0xc9),u8(0x7d),u8(0xfa),u8(0x59),u8(0x47),u8(0xf0)
  827     db  u8(0xad),u8(0xd4),u8(0xa2),u8(0xaf),u8(0x9c),u8(0xa4),u8(0x72),u8(0xc0)
  828     db  u8(0xb7),u8(0xfd),u8(0x93),u8(0x26),u8(0x36),u8(0x3f),u8(0xf7),u8(0xcc)
  829     db  u8(0x34),u8(0xa5),u8(0xe5),u8(0xf1),u8(0x71),u8(0xd8),u8(0x31),u8(0x15)
  830     db  u8(0x04),u8(0xc7),u8(0x23),u8(0xc3),u8(0x18),u8(0x96),u8(0x05),u8(0x9a)
  831     db  u8(0x07),u8(0x12),u8(0x80),u8(0xe2),u8(0xeb),u8(0x27),u8(0xb2),u8(0x75)
  832     db  u8(0x09),u8(0x83),u8(0x2c),u8(0x1a),u8(0x1b),u8(0x6e),u8(0x5a),u8(0xa0)
  833     db  u8(0x52),u8(0x3b),u8(0xd6),u8(0xb3),u8(0x29),u8(0xe3),u8(0x2f),u8(0x84)
  834     db  u8(0x53),u8(0xd1),u8(0x00),u8(0xed),u8(0x20),u8(0xfc),u8(0xb1),u8(0x5b)
  835     db  u8(0x6a),u8(0xcb),u8(0xbe),u8(0x39),u8(0x4a),u8(0x4c),u8(0x58),u8(0xcf)
  836     db  u8(0xd0),u8(0xef),u8(0xaa),u8(0xfb),u8(0x43),u8(0x4d),u8(0x33),u8(0x85)
  837     db  u8(0x45),u8(0xf9),u8(0x02),u8(0x7f),u8(0x50),u8(0x3c),u8(0x9f),u8(0xa8)
  838     db  u8(0x51),u8(0xa3),u8(0x40),u8(0x8f),u8(0x92),u8(0x9d),u8(0x38),u8(0xf5)
  839     db  u8(0xbc),u8(0xb6),u8(0xda),u8(0x21),u8(0x10),u8(0xff),u8(0xf3),u8(0xd2)
  840     db  u8(0xcd),u8(0x0c),u8(0x13),u8(0xec),u8(0x5f),u8(0x97),u8(0x44),u8(0x17)
  841     db  u8(0xc4),u8(0xa7),u8(0x7e),u8(0x3d),u8(0x64),u8(0x5d),u8(0x19),u8(0x73)
  842     db  u8(0x60),u8(0x81),u8(0x4f),u8(0xdc),u8(0x22),u8(0x2a),u8(0x90),u8(0x88)
  843     db  u8(0x46),u8(0xee),u8(0xb8),u8(0x14),u8(0xde),u8(0x5e),u8(0x0b),u8(0xdb)
  844     db  u8(0xe0),u8(0x32),u8(0x3a),u8(0x0a),u8(0x49),u8(0x06),u8(0x24),u8(0x5c)
  845     db  u8(0xc2),u8(0xd3),u8(0xac),u8(0x62),u8(0x91),u8(0x95),u8(0xe4),u8(0x79)
  846     db  u8(0xe7),u8(0xc8),u8(0x37),u8(0x6d),u8(0x8d),u8(0xd5),u8(0x4e),u8(0xa9)
  847     db  u8(0x6c),u8(0x56),u8(0xf4),u8(0xea),u8(0x65),u8(0x7a),u8(0xae),u8(0x08)
  848     db  u8(0xba),u8(0x78),u8(0x25),u8(0x2e),u8(0x1c),u8(0xa6),u8(0xb4),u8(0xc6)
  849     db  u8(0xe8),u8(0xdd),u8(0x74),u8(0x1f),u8(0x4b),u8(0xbd),u8(0x8b),u8(0x8a)
  850     db  u8(0x70),u8(0x3e),u8(0xb5),u8(0x66),u8(0x48),u8(0x03),u8(0xf6),u8(0x0e)
  851     db  u8(0x61),u8(0x35),u8(0x57),u8(0xb9),u8(0x86),u8(0xc1),u8(0x1d),u8(0x9e)
  852     db  u8(0xe1),u8(0xf8),u8(0x98),u8(0x11),u8(0x69),u8(0xd9),u8(0x8e),u8(0x94)
  853     db  u8(0x9b),u8(0x1e),u8(0x87),u8(0xe9),u8(0xce),u8(0x55),u8(0x28),u8(0xdf)
  854     db  u8(0x8c),u8(0xa1),u8(0x89),u8(0x0d),u8(0xbf),u8(0xe6),u8(0x42),u8(0x68)
  855     db  u8(0x41),u8(0x99),u8(0x2d),u8(0x0f),u8(0xb0),u8(0x54),u8(0xbb),u8(0x16)
  856 
  857 %endif
  858 
  859 %ifdef  DECRYPTION
  860 
  861 ; %define DECRYPTION_TABLE
  862 
  863 %define dtab_0(x)   [_aes_dec_tab+  8*x]
  864 %define dtab_1(x)   [_aes_dec_tab+3+8*x]
  865 %define dtab_2(x)   [_aes_dec_tab+2+8*x]
  866 %define dtab_3(x)   [_aes_dec_tab+1+8*x]
  867 %define dtab_x(x)   byte [_aes_dec_tab+7+8*x]
  868 
  869 %macro irn_fun 2
  870 
  871     rol eax,16
  872     %1      esi, cl, 0, ebp
  873     %1      esi, bh, 1, ebp
  874     %1      esi, al, 2, ebp
  875     %1      edi, dl, 0, ebp
  876     %1      edi, ch, 1, ebp
  877     %1      edi, ah, 3, ebp
  878     %2      ebp, bl, 0, ebp
  879     shr     eax,16
  880     and     ebx,0xffff0000
  881     or      ebx,eax
  882     shr     ecx,16
  883     %1      ebp, bh, 1, eax
  884     %1      ebp, ch, 3, eax
  885     %2      eax, cl, 2, ecx
  886     %1      eax, bl, 0, ecx
  887     %1      eax, dh, 1, ecx
  888     shr     ebx,16
  889     shr     edx,16
  890     %1      esi, dh, 3, ecx
  891     %1      ebp, dl, 2, ecx
  892     %1      eax, bh, 3, ecx
  893     %1      edi, bl, 2, ecx
  894 
  895 %endmacro
  896 
  897 ; Basic MOV and XOR Operations for normal rounds
  898 
  899 %macro  ni_xor  4
  900     movzx   %4,%2
  901     xor     %1,dtab_%3(%4)
  902 %endmacro
  903 
  904 %macro  ni_mov  4
  905     movzx   %4,%2
  906     mov     %1,dtab_%3(%4)
  907 %endmacro
  908 
  909 ; Basic MOV and XOR Operations for last round
  910 
  911 %macro  li_xor  4
  912     movzx   %4,%2
  913     movzx   %4,dtab_x(%4)
  914 %if %3 != 0
  915     shl     %4,8*%3
  916 %endif
  917     xor     %1,%4
  918 %endmacro
  919 
  920 %macro  li_mov  4
  921     movzx   %4,%2
  922     movzx   %1,dtab_x(%4)
  923 %if %3 != 0
  924     shl     %1,8*%3
  925 %endif
  926 %endmacro
  927 
  928 %ifdef REDUCE_CODE_SIZE
  929 
  930 dec_round:
  931     sub     sp, 2
  932 %ifdef AES_REV_DKS
  933     add     ebp,16
  934 %else
  935     sub     ebp,16
  936 %endif
  937     save    1,ebp
  938     mov     esi,[ebp+8]
  939     mov     edi,[ebp+12]
  940 
  941     irn_fun ni_xor, ni_mov
  942 
  943     mov     ebx,ebp
  944     mov     ecx,esi
  945     mov     edx,edi
  946     restore ebp,1
  947     xor     eax,[ebp]
  948     xor     ebx,[ebp+4]
  949     add     sp, 2
  950     ret
  951 
  952 %else
  953 
  954 %macro dec_round 0
  955 
  956 %ifdef AES_REV_DKS
  957     add     ebp,16
  958 %else
  959     sub     ebp,16
  960 %endif
  961     save    0,ebp
  962     mov     esi,[ebp+8]
  963     mov     edi,[ebp+12]
  964 
  965     irn_fun ni_xor, ni_mov
  966 
  967     mov     ebx,ebp
  968     mov     ecx,esi
  969     mov     edx,edi
  970     restore ebp,0
  971     xor     eax,[ebp]
  972     xor     ebx,[ebp+4]
  973 
  974 %endmacro
  975 
  976 %endif
  977 
  978 %macro dec_last_round 0
  979 
  980 %ifdef AES_REV_DKS
  981     add     ebp,16
  982 %else
  983     sub     ebp,16
  984 %endif
  985     save    0,ebp
  986     mov     esi,[ebp+8]
  987     mov     edi,[ebp+12]
  988 
  989     irn_fun li_xor, li_mov
  990 
  991     mov     ebx,ebp
  992     restore ebp,0
  993     xor     eax,[ebp]
  994     xor     ebx,[ebp+4]
  995 
  996 %endmacro
  997 
  998     section _TEXT
  999 
 1000 ; AES Decryption Subroutine
 1001 
 1002     do_name _aes_decrypt,12
 1003 
 1004     mov     ax, sp
 1005     movzx   esp, ax
 1006 
 1007     sub     esp,stk_spc
 1008     mov     [esp+16],ebp
 1009     mov     [esp+12],ebx
 1010     mov     [esp+ 8],esi
 1011     mov     [esp+ 4],edi
 1012 
 1013 ; input four columns and xor in first round key
 1014 
 1015     movzx   esi,word [esp+in_blk+stk_spc] ; input pointer
 1016     mov     eax,[esi   ]
 1017     mov     ebx,[esi+ 4]
 1018     mov     ecx,[esi+ 8]
 1019     mov     edx,[esi+12]
 1020     lea     esi,[esi+16]
 1021 
 1022     movzx   ebp, word [esp+ctx+stk_spc]    ; key pointer
 1023     movzx   edi,byte[ebp+4*KS_LENGTH]
 1024 %ifndef  AES_REV_DKS        ; if decryption key schedule is not reversed
 1025     lea     ebp,[ebp+edi] ; we have to access it from the top down
 1026 %endif
 1027     xor     eax,[ebp   ]  ; key schedule
 1028     xor     ebx,[ebp+ 4]
 1029     xor     ecx,[ebp+ 8]
 1030     xor     edx,[ebp+12]
 1031 
 1032 ; determine the number of rounds
 1033 
 1034 %ifndef AES_256
 1035     cmp     edi,10*16
 1036     je      .3
 1037     cmp     edi,12*16
 1038     je      .2
 1039     cmp     edi,14*16
 1040     je      .1
 1041     mov     eax,-1
 1042     jmp     .5
 1043 %endif
 1044 
 1045 .1: mf_call dec_round
 1046     mf_call dec_round
 1047 .2: mf_call dec_round
 1048     mf_call dec_round
 1049 .3: mf_call dec_round
 1050     mf_call dec_round
 1051     mf_call dec_round
 1052     mf_call dec_round
 1053     mf_call dec_round
 1054     mf_call dec_round
 1055     mf_call dec_round
 1056     mf_call dec_round
 1057     mf_call dec_round
 1058     dec_last_round
 1059 
 1060 ; move final values to the output array.
 1061 
 1062     movzx   ebp,word [esp+out_blk+stk_spc]
 1063     mov     [ebp],eax
 1064     mov     [ebp+4],ebx
 1065     mov     [ebp+8],esi
 1066     mov     [ebp+12],edi
 1067     xor     eax,eax
 1068 
 1069 .5: mov     ebp,[esp+16]
 1070     mov     ebx,[esp+12]
 1071     mov     esi,[esp+ 8]
 1072     mov     edi,[esp+ 4]
 1073     add     esp,stk_spc
 1074     do_exit 12
 1075 
 1076 %endif
 1077 
 1078 %ifdef REDUCE_CODE_SIZE
 1079 
 1080 inv_mix_col:
 1081     movzx   ecx,dl          ; input  eax, edx
 1082     movzx   ecx,etab_b(ecx) ; output eax
 1083     mov     eax,dtab_0(ecx) ; used   ecx
 1084     movzx   ecx,dh
 1085     shr     edx,16
 1086     movzx   ecx,etab_b(ecx)
 1087     xor     eax,dtab_1(ecx)
 1088     movzx   ecx,dl
 1089     movzx   ecx,etab_b(ecx)
 1090     xor     eax,dtab_2(ecx)
 1091     movzx   ecx,dh
 1092     movzx   ecx,etab_b(ecx)
 1093     xor     eax,dtab_3(ecx)
 1094     ret
 1095 
 1096 %else
 1097 
 1098 %macro  inv_mix_col 0
 1099 
 1100     movzx   ecx,dl          ; input  eax, edx
 1101     movzx   ecx,etab_b(ecx) ; output eax
 1102     mov     eax,dtab_0(ecx) ; used   ecx
 1103     movzx   ecx,dh
 1104     shr     edx,16
 1105     movzx   ecx,etab_b(ecx)
 1106     xor     eax,dtab_1(ecx)
 1107     movzx   ecx,dl
 1108     movzx   ecx,etab_b(ecx)
 1109     xor     eax,dtab_2(ecx)
 1110     movzx   ecx,dh
 1111     movzx   ecx,etab_b(ecx)
 1112     xor     eax,dtab_3(ecx)
 1113 
 1114 %endmacro
 1115 
 1116 %endif
 1117 
 1118 %ifdef DECRYPTION_KEY_SCHEDULE
 1119 
 1120 %ifdef AES_128
 1121 
 1122 %ifndef DECRYPTION_TABLE
 1123 ; %define DECRYPTION_TABLE
 1124 %endif
 1125 
 1126     do_name _aes_decrypt_key128,8
 1127 
 1128     push    ebp
 1129     push    ebx
 1130     push    esi
 1131     push    edi
 1132     mov     eax,[esp+24]    ; context
 1133     mov     edx,[esp+20]    ; key
 1134     push    eax
 1135     push    edx
 1136     do_call _aes_encrypt_key128,8   ; generate expanded encryption key
 1137     mov     eax,10*16
 1138     mov     esi,[esp+24]    ; pointer to first round key
 1139     lea     edi,[esi+eax]   ; pointer to last round key
 1140     add     esi,32
 1141                             ; the inverse mix column transformation
 1142     mov     edx,[esi-16]    ; needs to be applied to all round keys
 1143     mf_call inv_mix_col     ; except first and last. Hence start by
 1144     mov     [esi-16],eax    ; transforming the four sub-keys in the
 1145     mov     edx,[esi-12]    ; second round key
 1146     mf_call inv_mix_col
 1147     mov     [esi-12],eax    ; transformations for subsequent rounds
 1148     mov     edx,[esi-8]     ; can then be made more efficient by
 1149     mf_call inv_mix_col     ; noting that for three of the four sub-keys
 1150     mov     [esi-8],eax     ; in the encryption round key ek[r]:
 1151     mov     edx,[esi-4]     ;
 1152     mf_call inv_mix_col     ;   ek[r][n] = ek[r][n-1] ^ ek[r-1][n]
 1153     mov     [esi-4],eax     ;
 1154                             ; where n is 1..3. Hence the corresponding
 1155 .0: mov     edx,[esi]       ; subkeys in the decryption round key dk[r]
 1156     mf_call inv_mix_col     ; also obey since inv_mix_col is linear in
 1157     mov     [esi],eax       ; GF(256):
 1158     xor     eax,[esi-12]    ;
 1159     mov     [esi+4],eax     ;   dk[r][n] = dk[r][n-1] ^ dk[r-1][n]
 1160     xor     eax,[esi-8]     ;
 1161     mov     [esi+8],eax     ; So we only need one inverse mix column
 1162     xor     eax,[esi-4]     ; operation (n = 0) for each four word cycle
 1163     mov     [esi+12],eax    ; in the expanded key.
 1164     add     esi,16
 1165     cmp     edi,esi
 1166     jg      .0
 1167     jmp     dec_end
 1168 
 1169 %endif
 1170 
 1171 %ifdef AES_192
 1172 
 1173 %ifndef DECRYPTION_TABLE
 1174 ; %define DECRYPTION_TABLE
 1175 %endif
 1176 
 1177     do_name _aes_decrypt_key192,8
 1178 
 1179     push    ebp
 1180     push    ebx
 1181     push    esi
 1182     push    edi
 1183     mov     eax,[esp+24]    ; context
 1184     mov     edx,[esp+20]    ; key
 1185     push    eax
 1186     push    edx
 1187     do_call _aes_encrypt_key192,8   ; generate expanded encryption key
 1188     mov     eax,12*16
 1189     mov     esi,[esp+24]    ; first round key
 1190     lea     edi,[esi+eax]   ; last round key
 1191     add     esi,48          ; the first 6 words are the key, of
 1192                             ; which the top 2 words are part of
 1193     mov     edx,[esi-32]    ; the second round key and hence
 1194     mf_call inv_mix_col     ; need to be modified. After this we
 1195     mov     [esi-32],eax    ; need to do a further six values prior
 1196     mov     edx,[esi-28]    ; to using a more efficient technique
 1197     mf_call inv_mix_col     ; based on:
 1198     mov     [esi-28],eax    ;
 1199                             ; dk[r][n] = dk[r][n-1] ^ dk[r-1][n]
 1200     mov     edx,[esi-24]    ;
 1201     mf_call inv_mix_col     ; for n = 1 .. 5 where the key expansion
 1202     mov     [esi-24],eax    ; cycle is now 6 words long
 1203     mov     edx,[esi-20]
 1204     mf_call inv_mix_col
 1205     mov     [esi-20],eax
 1206     mov     edx,[esi-16]
 1207     mf_call inv_mix_col
 1208     mov     [esi-16],eax
 1209     mov     edx,[esi-12]
 1210     mf_call inv_mix_col
 1211     mov     [esi-12],eax
 1212     mov     edx,[esi-8]
 1213     mf_call inv_mix_col
 1214     mov     [esi-8],eax
 1215     mov     edx,[esi-4]
 1216     mf_call inv_mix_col
 1217     mov     [esi-4],eax
 1218 
 1219 .0: mov     edx,[esi]       ; the expanded key is 13 * 4 = 44 32-bit words
 1220     mf_call inv_mix_col     ; of which 11 * 4 = 44 have to be modified
 1221     mov     [esi],eax       ; using inv_mix_col.  We have already done 8
 1222     xor     eax,[esi-20]    ; of these so 36 are left - hence we need
 1223     mov     [esi+4],eax     ; exactly 6 loops of six here
 1224     xor     eax,[esi-16]
 1225     mov     [esi+8],eax
 1226     xor     eax,[esi-12]
 1227     mov     [esi+12],eax
 1228     xor     eax,[esi-8]
 1229     mov     [esi+16],eax
 1230     xor     eax,[esi-4]
 1231     mov     [esi+20],eax
 1232     add     esi,24
 1233     cmp     edi,esi
 1234     jg      .0
 1235     jmp     dec_end
 1236 
 1237 %endif
 1238 
 1239 %ifdef AES_256
 1240 
 1241 %ifndef DECRYPTION_TABLE
 1242 ; %define DECRYPTION_TABLE
 1243 %endif
 1244 
 1245     do_name _aes_decrypt_key256,8
 1246 
 1247     mov     ax, sp
 1248     movzx   esp, ax
 1249     push    ebp
 1250     push    ebx
 1251     push    esi
 1252     push    edi
 1253 
 1254     movzx   eax, word [esp+20] ; ks
 1255     movzx   edx, word [esp+18] ; key
 1256     push    ax
 1257     push    dx
 1258     do_call _aes_encrypt_key256,4   ; generate expanded encryption key
 1259     mov     eax,14*16
 1260     movzx   esi, word [esp+20] ; ks
 1261     lea     edi,[esi+eax]
 1262     add     esi,64
 1263 
 1264     mov     edx,[esi-48]    ; the primary key is 8 words, of which
 1265     mf_call inv_mix_col     ; the top four require modification
 1266     mov     [esi-48],eax
 1267     mov     edx,[esi-44]
 1268     mf_call inv_mix_col
 1269     mov     [esi-44],eax
 1270     mov     edx,[esi-40]
 1271     mf_call inv_mix_col
 1272     mov     [esi-40],eax
 1273     mov     edx,[esi-36]
 1274     mf_call inv_mix_col
 1275     mov     [esi-36],eax
 1276 
 1277     mov     edx,[esi-32]    ; the encryption key expansion cycle is
 1278     mf_call inv_mix_col     ; now eight words long so we need to
 1279     mov     [esi-32],eax    ; start by doing one complete block
 1280     mov     edx,[esi-28]
 1281     mf_call inv_mix_col
 1282     mov     [esi-28],eax
 1283     mov     edx,[esi-24]
 1284     mf_call inv_mix_col
 1285     mov     [esi-24],eax
 1286     mov     edx,[esi-20]
 1287     mf_call inv_mix_col
 1288     mov     [esi-20],eax
 1289     mov     edx,[esi-16]
 1290     mf_call inv_mix_col
 1291     mov     [esi-16],eax
 1292     mov     edx,[esi-12]
 1293     mf_call inv_mix_col
 1294     mov     [esi-12],eax
 1295     mov     edx,[esi-8]
 1296     mf_call inv_mix_col
 1297     mov     [esi-8],eax
 1298     mov     edx,[esi-4]
 1299     mf_call inv_mix_col
 1300     mov     [esi-4],eax
 1301 
 1302 .0: mov     edx,[esi]       ; we can now speed up the remaining
 1303     mf_call inv_mix_col     ; rounds by using the technique
 1304     mov     [esi],eax       ; outlined earlier.  But note that
 1305     xor     eax,[esi-28]    ; there is one extra inverse mix
 1306     mov     [esi+4],eax     ; column operation as the 256 bit
 1307     xor     eax,[esi-24]    ; key has an extra non-linear step
 1308     mov     [esi+8],eax     ; for the midway element.
 1309     xor     eax,[esi-20]
 1310     mov     [esi+12],eax    ; the expanded key is 15 * 4 = 60
 1311     mov     edx,[esi+16]    ; 32-bit words of which 52 need to
 1312     mf_call inv_mix_col     ; be modified.  We have already done
 1313     mov     [esi+16],eax    ; 12 so 40 are left - which means
 1314     xor     eax,[esi-12]    ; that we need exactly 5 loops of 8
 1315     mov     [esi+20],eax
 1316     xor     eax,[esi-8]
 1317     mov     [esi+24],eax
 1318     xor     eax,[esi-4]
 1319     mov     [esi+28],eax
 1320     add     esi,32
 1321     cmp     edi,esi
 1322     jg      .0
 1323 
 1324 %endif
 1325 
 1326 dec_end:
 1327 
 1328 %ifdef AES_REV_DKS
 1329 
 1330     movzx   esi,word [esp+20]   ; this reverses the order of the
 1331 .1: mov     eax,[esi]           ; round keys if required
 1332     mov     ebx,[esi+4]
 1333     mov     ebp,[edi]
 1334     mov     edx,[edi+4]
 1335     mov     [esi],ebp
 1336     mov     [esi+4],edx
 1337     mov     [edi],eax
 1338     mov     [edi+4],ebx
 1339 
 1340     mov     eax,[esi+8]
 1341     mov     ebx,[esi+12]
 1342     mov     ebp,[edi+8]
 1343     mov     edx,[edi+12]
 1344     mov     [esi+8],ebp
 1345     mov     [esi+12],edx
 1346     mov     [edi+8],eax
 1347     mov     [edi+12],ebx
 1348 
 1349     add     esi,16
 1350     sub     edi,16
 1351     cmp     edi,esi
 1352     jg      .1
 1353 
 1354 %endif
 1355 
 1356     pop     edi
 1357     pop     esi
 1358     pop     ebx
 1359     pop     ebp
 1360     xor     eax,eax
 1361     do_exit  8
 1362 
 1363 %ifdef AES_VAR
 1364 
 1365     do_name _aes_decrypt_key,12
 1366 
 1367     mov     ecx,[esp+4]
 1368     mov     eax,[esp+8]
 1369     mov     edx,[esp+12]
 1370     push    edx
 1371     push    ecx
 1372 
 1373     cmp     eax,16
 1374     je      .1
 1375     cmp     eax,128
 1376     je      .1
 1377 
 1378     cmp     eax,24
 1379     je      .2
 1380     cmp     eax,192
 1381     je      .2
 1382 
 1383     cmp     eax,32
 1384     je      .3
 1385     cmp     eax,256
 1386     je      .3
 1387     mov     eax,-1
 1388     add     esp,8
 1389     do_exit 12
 1390 
 1391 .1: do_call _aes_decrypt_key128,8
 1392     do_exit 12
 1393 .2: do_call _aes_decrypt_key192,8
 1394     do_exit 12
 1395 .3: do_call _aes_decrypt_key256,8
 1396     do_exit 12
 1397 
 1398 %endif
 1399 
 1400 %endif
 1401 
 1402 %ifdef DECRYPTION_TABLE
 1403 
 1404 ; Inverse S-box data - 256 entries
 1405 
 1406     section _DATA
 1407 
 1408 %define v8(x)   fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x
 1409 
 1410 _aes_dec_tab:
 1411     db  v8(0x52),v8(0x09),v8(0x6a),v8(0xd5),v8(0x30),v8(0x36),v8(0xa5),v8(0x38)
 1412     db  v8(0xbf),v8(0x40),v8(0xa3),v8(0x9e),v8(0x81),v8(0xf3),v8(0xd7),v8(0xfb)
 1413     db  v8(0x7c),v8(0xe3),v8(0x39),v8(0x82),v8(0x9b),v8(0x2f),v8(0xff),v8(0x87)
 1414     db  v8(0x34),v8(0x8e),v8(0x43),v8(0x44),v8(0xc4),v8(0xde),v8(0xe9),v8(0xcb)
 1415     db  v8(0x54),v8(0x7b),v8(0x94),v8(0x32),v8(0xa6),v8(0xc2),v8(0x23),v8(0x3d)
 1416     db  v8(0xee),v8(0x4c),v8(0x95),v8(0x0b),v8(0x42),v8(0xfa),v8(0xc3),v8(0x4e)
 1417     db  v8(0x08),v8(0x2e),v8(0xa1),v8(0x66),v8(0x28),v8(0xd9),v8(0x24),v8(0xb2)
 1418     db  v8(0x76),v8(0x5b),v8(0xa2),v8(0x49),v8(0x6d),v8(0x8b),v8(0xd1),v8(0x25)
 1419     db  v8(0x72),v8(0xf8),v8(0xf6),v8(0x64),v8(0x86),v8(0x68),v8(0x98),v8(0x16)
 1420     db  v8(0xd4),v8(0xa4),v8(0x5c),v8(0xcc),v8(0x5d),v8(0x65),v8(0xb6),v8(0x92)
 1421     db  v8(0x6c),v8(0x70),v8(0x48),v8(0x50),v8(0xfd),v8(0xed),v8(0xb9),v8(0xda)
 1422     db  v8(0x5e),v8(0x15),v8(0x46),v8(0x57),v8(0xa7),v8(0x8d),v8(0x9d),v8(0x84)
 1423     db  v8(0x90),v8(0xd8),v8(0xab),v8(0x00),v8(0x8c),v8(0xbc),v8(0xd3),v8(0x0a)
 1424     db  v8(0xf7),v8(0xe4),v8(0x58),v8(0x05),v8(0xb8),v8(0xb3),v8(0x45),v8(0x06)
 1425     db  v8(0xd0),v8(0x2c),v8(0x1e),v8(0x8f),v8(0xca),v8(0x3f),v8(0x0f),v8(0x02)
 1426     db  v8(0xc1),v8(0xaf),v8(0xbd),v8(0x03),v8(0x01),v8(0x13),v8(0x8a),v8(0x6b)
 1427     db  v8(0x3a),v8(0x91),v8(0x11),v8(0x41),v8(0x4f),v8(0x67),v8(0xdc),v8(0xea)
 1428     db  v8(0x97),v8(0xf2),v8(0xcf),v8(0xce),v8(0xf0),v8(0xb4),v8(0xe6),v8(0x73)
 1429     db  v8(0x96),v8(0xac),v8(0x74),v8(0x22),v8(0xe7),v8(0xad),v8(0x35),v8(0x85)
 1430     db  v8(0xe2),v8(0xf9),v8(0x37),v8(0xe8),v8(0x1c),v8(0x75),v8(0xdf),v8(0x6e)
 1431     db  v8(0x47),v8(0xf1),v8(0x1a),v8(0x71),v8(0x1d),v8(0x29),v8(0xc5),v8(0x89)
 1432     db  v8(0x6f),v8(0xb7),v8(0x62),v8(0x0e),v8(0xaa),v8(0x18),v8(0xbe),v8(0x1b)
 1433     db  v8(0xfc),v8(0x56),v8(0x3e),v8(0x4b),v8(0xc6),v8(0xd2),v8(0x79),v8(0x20)
 1434     db  v8(0x9a),v8(0xdb),v8(0xc0),v8(0xfe),v8(0x78),v8(0xcd),v8(0x5a),v8(0xf4)
 1435     db  v8(0x1f),v8(0xdd),v8(0xa8),v8(0x33),v8(0x88),v8(0x07),v8(0xc7),v8(0x31)
 1436     db  v8(0xb1),v8(0x12),v8(0x10),v8(0x59),v8(0x27),v8(0x80),v8(0xec),v8(0x5f)
 1437     db  v8(0x60),v8(0x51),v8(0x7f),v8(0xa9),v8(0x19),v8(0xb5),v8(0x4a),v8(0x0d)
 1438     db  v8(0x2d),v8(0xe5),v8(0x7a),v8(0x9f),v8(0x93),v8(0xc9),v8(0x9c),v8(0xef)
 1439     db  v8(0xa0),v8(0xe0),v8(0x3b),v8(0x4d),v8(0xae),v8(0x2a),v8(0xf5),v8(0xb0)
 1440     db  v8(0xc8),v8(0xeb),v8(0xbb),v8(0x3c),v8(0x83),v8(0x53),v8(0x99),v8(0x61)
 1441     db  v8(0x17),v8(0x2b),v8(0x04),v8(0x7e),v8(0xba),v8(0x77),v8(0xd6),v8(0x26)
 1442     db  v8(0xe1),v8(0x69),v8(0x14),v8(0x63),v8(0x55),v8(0x21),v8(0x0c),v8(0x7d)
 1443 
 1444 %endif