"Fossies" - the Fresh Open Source Software Archive

Member "openssl-1.0.2q/crypto/aes/asm/aesp8-ppc.pl" (20 Nov 2018, 48154 Bytes) of package /linux/misc/openssl-1.0.2q.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "aesp8-ppc.pl" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.1.1-pre3_vs_1.1.1-pre8.

    1 #!/usr/bin/env perl
    2 #
    3 # ====================================================================
    4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
    5 # project. The module is, however, dual licensed under OpenSSL and
    6 # CRYPTOGAMS licenses depending on where you obtain it. For further
    7 # details see http://www.openssl.org/~appro/cryptogams/.
    8 # ====================================================================
    9 #
   10 # This module implements support for AES instructions as per PowerISA
   11 # specification version 2.07, first implemented by POWER8 processor.
   12 # The module is endian-agnostic in sense that it supports both big-
   13 # and little-endian cases. Data alignment in parallelizable modes is
   14 # handled with VSX loads and stores, which implies MSR.VSX flag being
   15 # set. It should also be noted that ISA specification doesn't prohibit
   16 # alignment exceptions for these instructions on page boundaries.
   17 # Initially alignment was handled in pure AltiVec/VMX way [when data
   18 # is aligned programmatically, which in turn guarantees exception-
   19 # free execution], but it turned to hamper performance when vcipher
   20 # instructions are interleaved. It's reckoned that eventual
   21 # misalignment penalties at page boundaries are in average lower
   22 # than additional overhead in pure AltiVec approach.
   23 
   24 $flavour = shift;
   25 
   26 if ($flavour =~ /64/) {
   27     $SIZE_T =8;
   28     $LRSAVE =2*$SIZE_T;
   29     $STU    ="stdu";
   30     $POP    ="ld";
   31     $PUSH   ="std";
   32     $UCMP   ="cmpld";
   33     $SHL    ="sldi";
   34 } elsif ($flavour =~ /32/) {
   35     $SIZE_T =4;
   36     $LRSAVE =$SIZE_T;
   37     $STU    ="stwu";
   38     $POP    ="lwz";
   39     $PUSH   ="stw";
   40     $UCMP   ="cmplw";
   41     $SHL    ="slwi";
   42 } else { die "nonsense $flavour"; }
   43 
   44 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
   45 
   46 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
   47 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
   48 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
   49 die "can't locate ppc-xlate.pl";
   50 
   51 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
   52 
   53 $FRAME=8*$SIZE_T;
   54 $prefix="aes_p8";
   55 
   56 $sp="r1";
   57 $vrsave="r12";
   58 
   59 #########################################################################
   60 {{{ # Key setup procedures                      #
   61 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
   62 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
   63 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
   64 
   65 $code.=<<___;
   66 .machine    "any"
   67 
   68 .text
   69 
   70 .align  7
   71 rcon:
   72 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
   73 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
   74 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
   75 .long   0,0,0,0                     ?asis
   76 Lconsts:
   77     mflr    r0
   78     bcl 20,31,\$+4
   79     mflr    $ptr     #vvvvv "distance between . and rcon
   80     addi    $ptr,$ptr,-0x48
   81     mtlr    r0
   82     blr
   83     .long   0
   84     .byte   0,12,0x14,0,0,0,0,0
   85 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
   86 
   87 .globl  .${prefix}_set_encrypt_key
   88 .align  5
   89 .${prefix}_set_encrypt_key:
   90 Lset_encrypt_key:
   91     mflr        r11
   92     $PUSH       r11,$LRSAVE($sp)
   93 
   94     li      $ptr,-1
   95     ${UCMP}i    $inp,0
   96     beq-        Lenc_key_abort      # if ($inp==0) return -1;
   97     ${UCMP}i    $out,0
   98     beq-        Lenc_key_abort      # if ($out==0) return -1;
   99     li      $ptr,-2
  100     cmpwi       $bits,128
  101     blt-        Lenc_key_abort
  102     cmpwi       $bits,256
  103     bgt-        Lenc_key_abort
  104     andi.       r0,$bits,0x3f
  105     bne-        Lenc_key_abort
  106 
  107     lis     r0,0xfff0
  108     mfspr       $vrsave,256
  109     mtspr       256,r0
  110 
  111     bl      Lconsts
  112     mtlr        r11
  113 
  114     neg     r9,$inp
  115     lvx     $in0,0,$inp
  116     addi        $inp,$inp,15        # 15 is not typo
  117     lvsr        $key,0,r9       # borrow $key
  118     li      r8,0x20
  119     cmpwi       $bits,192
  120     lvx     $in1,0,$inp
  121     le?vspltisb $mask,0x0f      # borrow $mask
  122     lvx     $rcon,0,$ptr
  123     le?vxor     $key,$key,$mask     # adjust for byte swap
  124     lvx     $mask,r8,$ptr
  125     addi        $ptr,$ptr,0x10
  126     vperm       $in0,$in0,$in1,$key # align [and byte swap in LE]
  127     li      $cnt,8
  128     vxor        $zero,$zero,$zero
  129     mtctr       $cnt
  130 
  131     ?lvsr       $outperm,0,$out
  132     vspltisb    $outmask,-1
  133     lvx     $outhead,0,$out
  134     ?vperm      $outmask,$zero,$outmask,$outperm
  135 
  136     blt     Loop128
  137     addi        $inp,$inp,8
  138     beq     L192
  139     addi        $inp,$inp,8
  140     b       L256
  141 
  142 .align  4
  143 Loop128:
  144     vperm       $key,$in0,$in0,$mask    # rotate-n-splat
  145     vsldoi      $tmp,$zero,$in0,12  # >>32
  146      vperm      $outtail,$in0,$in0,$outperm # rotate
  147      vsel       $stage,$outhead,$outtail,$outmask
  148      vmr        $outhead,$outtail
  149     vcipherlast $key,$key,$rcon
  150      stvx       $stage,0,$out
  151      addi       $out,$out,16
  152 
  153     vxor        $in0,$in0,$tmp
  154     vsldoi      $tmp,$zero,$tmp,12  # >>32
  155     vxor        $in0,$in0,$tmp
  156     vsldoi      $tmp,$zero,$tmp,12  # >>32
  157     vxor        $in0,$in0,$tmp
  158      vadduwm    $rcon,$rcon,$rcon
  159     vxor        $in0,$in0,$key
  160     bdnz        Loop128
  161 
  162     lvx     $rcon,0,$ptr        # last two round keys
  163 
  164     vperm       $key,$in0,$in0,$mask    # rotate-n-splat
  165     vsldoi      $tmp,$zero,$in0,12  # >>32
  166      vperm      $outtail,$in0,$in0,$outperm # rotate
  167      vsel       $stage,$outhead,$outtail,$outmask
  168      vmr        $outhead,$outtail
  169     vcipherlast $key,$key,$rcon
  170      stvx       $stage,0,$out
  171      addi       $out,$out,16
  172 
  173     vxor        $in0,$in0,$tmp
  174     vsldoi      $tmp,$zero,$tmp,12  # >>32
  175     vxor        $in0,$in0,$tmp
  176     vsldoi      $tmp,$zero,$tmp,12  # >>32
  177     vxor        $in0,$in0,$tmp
  178      vadduwm    $rcon,$rcon,$rcon
  179     vxor        $in0,$in0,$key
  180 
  181     vperm       $key,$in0,$in0,$mask    # rotate-n-splat
  182     vsldoi      $tmp,$zero,$in0,12  # >>32
  183      vperm      $outtail,$in0,$in0,$outperm # rotate
  184      vsel       $stage,$outhead,$outtail,$outmask
  185      vmr        $outhead,$outtail
  186     vcipherlast $key,$key,$rcon
  187      stvx       $stage,0,$out
  188      addi       $out,$out,16
  189 
  190     vxor        $in0,$in0,$tmp
  191     vsldoi      $tmp,$zero,$tmp,12  # >>32
  192     vxor        $in0,$in0,$tmp
  193     vsldoi      $tmp,$zero,$tmp,12  # >>32
  194     vxor        $in0,$in0,$tmp
  195     vxor        $in0,$in0,$key
  196      vperm      $outtail,$in0,$in0,$outperm # rotate
  197      vsel       $stage,$outhead,$outtail,$outmask
  198      vmr        $outhead,$outtail
  199      stvx       $stage,0,$out
  200 
  201     addi        $inp,$out,15        # 15 is not typo
  202     addi        $out,$out,0x50
  203 
  204     li      $rounds,10
  205     b       Ldone
  206 
  207 .align  4
  208 L192:
  209     lvx     $tmp,0,$inp
  210     li      $cnt,4
  211      vperm      $outtail,$in0,$in0,$outperm # rotate
  212      vsel       $stage,$outhead,$outtail,$outmask
  213      vmr        $outhead,$outtail
  214      stvx       $stage,0,$out
  215      addi       $out,$out,16
  216     vperm       $in1,$in1,$tmp,$key # align [and byte swap in LE]
  217     vspltisb    $key,8          # borrow $key
  218     mtctr       $cnt
  219     vsububm     $mask,$mask,$key    # adjust the mask
  220 
  221 Loop192:
  222     vperm       $key,$in1,$in1,$mask    # roate-n-splat
  223     vsldoi      $tmp,$zero,$in0,12  # >>32
  224     vcipherlast $key,$key,$rcon
  225 
  226     vxor        $in0,$in0,$tmp
  227     vsldoi      $tmp,$zero,$tmp,12  # >>32
  228     vxor        $in0,$in0,$tmp
  229     vsldoi      $tmp,$zero,$tmp,12  # >>32
  230     vxor        $in0,$in0,$tmp
  231 
  232      vsldoi     $stage,$zero,$in1,8
  233     vspltw      $tmp,$in0,3
  234     vxor        $tmp,$tmp,$in1
  235     vsldoi      $in1,$zero,$in1,12  # >>32
  236      vadduwm    $rcon,$rcon,$rcon
  237     vxor        $in1,$in1,$tmp
  238     vxor        $in0,$in0,$key
  239     vxor        $in1,$in1,$key
  240      vsldoi     $stage,$stage,$in0,8
  241 
  242     vperm       $key,$in1,$in1,$mask    # rotate-n-splat
  243     vsldoi      $tmp,$zero,$in0,12  # >>32
  244      vperm      $outtail,$stage,$stage,$outperm # rotate
  245      vsel       $stage,$outhead,$outtail,$outmask
  246      vmr        $outhead,$outtail
  247     vcipherlast $key,$key,$rcon
  248      stvx       $stage,0,$out
  249      addi       $out,$out,16
  250 
  251      vsldoi     $stage,$in0,$in1,8
  252     vxor        $in0,$in0,$tmp
  253     vsldoi      $tmp,$zero,$tmp,12  # >>32
  254      vperm      $outtail,$stage,$stage,$outperm # rotate
  255      vsel       $stage,$outhead,$outtail,$outmask
  256      vmr        $outhead,$outtail
  257     vxor        $in0,$in0,$tmp
  258     vsldoi      $tmp,$zero,$tmp,12  # >>32
  259     vxor        $in0,$in0,$tmp
  260      stvx       $stage,0,$out
  261      addi       $out,$out,16
  262 
  263     vspltw      $tmp,$in0,3
  264     vxor        $tmp,$tmp,$in1
  265     vsldoi      $in1,$zero,$in1,12  # >>32
  266      vadduwm    $rcon,$rcon,$rcon
  267     vxor        $in1,$in1,$tmp
  268     vxor        $in0,$in0,$key
  269     vxor        $in1,$in1,$key
  270      vperm      $outtail,$in0,$in0,$outperm # rotate
  271      vsel       $stage,$outhead,$outtail,$outmask
  272      vmr        $outhead,$outtail
  273      stvx       $stage,0,$out
  274      addi       $inp,$out,15        # 15 is not typo
  275      addi       $out,$out,16
  276     bdnz        Loop192
  277 
  278     li      $rounds,12
  279     addi        $out,$out,0x20
  280     b       Ldone
  281 
  282 .align  4
  283 L256:
  284     lvx     $tmp,0,$inp
  285     li      $cnt,7
  286     li      $rounds,14
  287      vperm      $outtail,$in0,$in0,$outperm # rotate
  288      vsel       $stage,$outhead,$outtail,$outmask
  289      vmr        $outhead,$outtail
  290      stvx       $stage,0,$out
  291      addi       $out,$out,16
  292     vperm       $in1,$in1,$tmp,$key # align [and byte swap in LE]
  293     mtctr       $cnt
  294 
  295 Loop256:
  296     vperm       $key,$in1,$in1,$mask    # rotate-n-splat
  297     vsldoi      $tmp,$zero,$in0,12  # >>32
  298      vperm      $outtail,$in1,$in1,$outperm # rotate
  299      vsel       $stage,$outhead,$outtail,$outmask
  300      vmr        $outhead,$outtail
  301     vcipherlast $key,$key,$rcon
  302      stvx       $stage,0,$out
  303      addi       $out,$out,16
  304 
  305     vxor        $in0,$in0,$tmp
  306     vsldoi      $tmp,$zero,$tmp,12  # >>32
  307     vxor        $in0,$in0,$tmp
  308     vsldoi      $tmp,$zero,$tmp,12  # >>32
  309     vxor        $in0,$in0,$tmp
  310      vadduwm    $rcon,$rcon,$rcon
  311     vxor        $in0,$in0,$key
  312      vperm      $outtail,$in0,$in0,$outperm # rotate
  313      vsel       $stage,$outhead,$outtail,$outmask
  314      vmr        $outhead,$outtail
  315      stvx       $stage,0,$out
  316      addi       $inp,$out,15        # 15 is not typo
  317      addi       $out,$out,16
  318     bdz     Ldone
  319 
  320     vspltw      $key,$in0,3     # just splat
  321     vsldoi      $tmp,$zero,$in1,12  # >>32
  322     vsbox       $key,$key
  323 
  324     vxor        $in1,$in1,$tmp
  325     vsldoi      $tmp,$zero,$tmp,12  # >>32
  326     vxor        $in1,$in1,$tmp
  327     vsldoi      $tmp,$zero,$tmp,12  # >>32
  328     vxor        $in1,$in1,$tmp
  329 
  330     vxor        $in1,$in1,$key
  331     b       Loop256
  332 
  333 .align  4
  334 Ldone:
  335     lvx     $in1,0,$inp     # redundant in aligned case
  336     vsel        $in1,$outhead,$in1,$outmask
  337     stvx        $in1,0,$inp
  338     li      $ptr,0
  339     mtspr       256,$vrsave
  340     stw     $rounds,0($out)
  341 
  342 Lenc_key_abort:
  343     mr      r3,$ptr
  344     blr
  345     .long       0
  346     .byte       0,12,0x14,1,0,0,3,0
  347     .long       0
  348 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
  349 
  350 .globl  .${prefix}_set_decrypt_key
  351 .align  5
  352 .${prefix}_set_decrypt_key:
  353     $STU        $sp,-$FRAME($sp)
  354     mflr        r10
  355     $PUSH       r10,$FRAME+$LRSAVE($sp)
  356     bl      Lset_encrypt_key
  357     mtlr        r10
  358 
  359     cmpwi       r3,0
  360     bne-        Ldec_key_abort
  361 
  362     slwi        $cnt,$rounds,4
  363     subi        $inp,$out,240       # first round key
  364     srwi        $rounds,$rounds,1
  365     add     $out,$inp,$cnt      # last round key
  366     mtctr       $rounds
  367 
  368 Ldeckey:
  369     lwz     r0, 0($inp)
  370     lwz     r6, 4($inp)
  371     lwz     r7, 8($inp)
  372     lwz     r8, 12($inp)
  373     addi        $inp,$inp,16
  374     lwz     r9, 0($out)
  375     lwz     r10,4($out)
  376     lwz     r11,8($out)
  377     lwz     r12,12($out)
  378     stw     r0, 0($out)
  379     stw     r6, 4($out)
  380     stw     r7, 8($out)
  381     stw     r8, 12($out)
  382     subi        $out,$out,16
  383     stw     r9, -16($inp)
  384     stw     r10,-12($inp)
  385     stw     r11,-8($inp)
  386     stw     r12,-4($inp)
  387     bdnz        Ldeckey
  388 
  389     xor     r3,r3,r3        # return value
  390 Ldec_key_abort:
  391     addi        $sp,$sp,$FRAME
  392     blr
  393     .long       0
  394     .byte       0,12,4,1,0x80,0,3,0
  395     .long       0
  396 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
  397 ___
  398 }}}
  399 #########################################################################
  400 {{{ # Single block en- and decrypt procedures           #
  401 sub gen_block () {
  402 my $dir = shift;
  403 my $n   = $dir eq "de" ? "n" : "";
  404 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
  405 
  406 $code.=<<___;
  407 .globl  .${prefix}_${dir}crypt
  408 .align  5
  409 .${prefix}_${dir}crypt:
  410     lwz     $rounds,240($key)
  411     lis     r0,0xfc00
  412     mfspr       $vrsave,256
  413     li      $idx,15         # 15 is not typo
  414     mtspr       256,r0
  415 
  416     lvx     v0,0,$inp
  417     neg     r11,$out
  418     lvx     v1,$idx,$inp
  419     lvsl        v2,0,$inp       # inpperm
  420     le?vspltisb v4,0x0f
  421     ?lvsl       v3,0,r11        # outperm
  422     le?vxor     v2,v2,v4
  423     li      $idx,16
  424     vperm       v0,v0,v1,v2     # align [and byte swap in LE]
  425     lvx     v1,0,$key
  426     ?lvsl       v5,0,$key       # keyperm
  427     srwi        $rounds,$rounds,1
  428     lvx     v2,$idx,$key
  429     addi        $idx,$idx,16
  430     subi        $rounds,$rounds,1
  431     ?vperm      v1,v1,v2,v5     # align round key
  432 
  433     vxor        v0,v0,v1
  434     lvx     v1,$idx,$key
  435     addi        $idx,$idx,16
  436     mtctr       $rounds
  437 
  438 Loop_${dir}c:
  439     ?vperm      v2,v2,v1,v5
  440     v${n}cipher v0,v0,v2
  441     lvx     v2,$idx,$key
  442     addi        $idx,$idx,16
  443     ?vperm      v1,v1,v2,v5
  444     v${n}cipher v0,v0,v1
  445     lvx     v1,$idx,$key
  446     addi        $idx,$idx,16
  447     bdnz        Loop_${dir}c
  448 
  449     ?vperm      v2,v2,v1,v5
  450     v${n}cipher v0,v0,v2
  451     lvx     v2,$idx,$key
  452     ?vperm      v1,v1,v2,v5
  453     v${n}cipherlast v0,v0,v1
  454 
  455     vspltisb    v2,-1
  456     vxor        v1,v1,v1
  457     li      $idx,15         # 15 is not typo
  458     ?vperm      v2,v1,v2,v3     # outmask
  459     le?vxor     v3,v3,v4
  460     lvx     v1,0,$out       # outhead
  461     vperm       v0,v0,v0,v3     # rotate [and byte swap in LE]
  462     vsel        v1,v1,v0,v2
  463     lvx     v4,$idx,$out
  464     stvx        v1,0,$out
  465     vsel        v0,v0,v4,v2
  466     stvx        v0,$idx,$out
  467 
  468     mtspr       256,$vrsave
  469     blr
  470     .long       0
  471     .byte       0,12,0x14,0,0,0,3,0
  472     .long       0
  473 .size   .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
  474 ___
  475 }
  476 &gen_block("en");
  477 &gen_block("de");
  478 }}}
  479 #########################################################################
  480 {{{ # CBC en- and decrypt procedures                #
  481 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
  482 my ($rndkey0,$rndkey1,$inout,$tmp)=     map("v$_",(0..3));
  483 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
  484                         map("v$_",(4..10));
  485 $code.=<<___;
  486 .globl  .${prefix}_cbc_encrypt
  487 .align  5
  488 .${prefix}_cbc_encrypt:
  489     ${UCMP}i    $len,16
  490     bltlr-
  491 
  492     cmpwi       $enc,0          # test direction
  493     lis     r0,0xffe0
  494     mfspr       $vrsave,256
  495     mtspr       256,r0
  496 
  497     li      $idx,15
  498     vxor        $rndkey0,$rndkey0,$rndkey0
  499     le?vspltisb $tmp,0x0f
  500 
  501     lvx     $ivec,0,$ivp        # load [unaligned] iv
  502     lvsl        $inpperm,0,$ivp
  503     lvx     $inptail,$idx,$ivp
  504     le?vxor     $inpperm,$inpperm,$tmp
  505     vperm       $ivec,$ivec,$inptail,$inpperm
  506 
  507     neg     r11,$inp
  508     ?lvsl       $keyperm,0,$key     # prepare for unaligned key
  509     lwz     $rounds,240($key)
  510 
  511     lvsr        $inpperm,0,r11      # prepare for unaligned load
  512     lvx     $inptail,0,$inp
  513     addi        $inp,$inp,15        # 15 is not typo
  514     le?vxor     $inpperm,$inpperm,$tmp
  515 
  516     ?lvsr       $outperm,0,$out     # prepare for unaligned store
  517     vspltisb    $outmask,-1
  518     lvx     $outhead,0,$out
  519     ?vperm      $outmask,$rndkey0,$outmask,$outperm
  520     le?vxor     $outperm,$outperm,$tmp
  521 
  522     srwi        $rounds,$rounds,1
  523     li      $idx,16
  524     subi        $rounds,$rounds,1
  525     beq     Lcbc_dec
  526 
  527 Lcbc_enc:
  528     vmr     $inout,$inptail
  529     lvx     $inptail,0,$inp
  530     addi        $inp,$inp,16
  531     mtctr       $rounds
  532     subi        $len,$len,16        # len-=16
  533 
  534     lvx     $rndkey0,0,$key
  535      vperm      $inout,$inout,$inptail,$inpperm
  536     lvx     $rndkey1,$idx,$key
  537     addi        $idx,$idx,16
  538     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
  539     vxor        $inout,$inout,$rndkey0
  540     lvx     $rndkey0,$idx,$key
  541     addi        $idx,$idx,16
  542     vxor        $inout,$inout,$ivec
  543 
  544 Loop_cbc_enc:
  545     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
  546     vcipher     $inout,$inout,$rndkey1
  547     lvx     $rndkey1,$idx,$key
  548     addi        $idx,$idx,16
  549     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
  550     vcipher     $inout,$inout,$rndkey0
  551     lvx     $rndkey0,$idx,$key
  552     addi        $idx,$idx,16
  553     bdnz        Loop_cbc_enc
  554 
  555     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
  556     vcipher     $inout,$inout,$rndkey1
  557     lvx     $rndkey1,$idx,$key
  558     li      $idx,16
  559     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
  560     vcipherlast $ivec,$inout,$rndkey0
  561     ${UCMP}i    $len,16
  562 
  563     vperm       $tmp,$ivec,$ivec,$outperm
  564     vsel        $inout,$outhead,$tmp,$outmask
  565     vmr     $outhead,$tmp
  566     stvx        $inout,0,$out
  567     addi        $out,$out,16
  568     bge     Lcbc_enc
  569 
  570     b       Lcbc_done
  571 
  572 .align  4
  573 Lcbc_dec:
  574     ${UCMP}i    $len,128
  575     bge     _aesp8_cbc_decrypt8x
  576     vmr     $tmp,$inptail
  577     lvx     $inptail,0,$inp
  578     addi        $inp,$inp,16
  579     mtctr       $rounds
  580     subi        $len,$len,16        # len-=16
  581 
  582     lvx     $rndkey0,0,$key
  583      vperm      $tmp,$tmp,$inptail,$inpperm
  584     lvx     $rndkey1,$idx,$key
  585     addi        $idx,$idx,16
  586     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
  587     vxor        $inout,$tmp,$rndkey0
  588     lvx     $rndkey0,$idx,$key
  589     addi        $idx,$idx,16
  590 
  591 Loop_cbc_dec:
  592     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
  593     vncipher    $inout,$inout,$rndkey1
  594     lvx     $rndkey1,$idx,$key
  595     addi        $idx,$idx,16
  596     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
  597     vncipher    $inout,$inout,$rndkey0
  598     lvx     $rndkey0,$idx,$key
  599     addi        $idx,$idx,16
  600     bdnz        Loop_cbc_dec
  601 
  602     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
  603     vncipher    $inout,$inout,$rndkey1
  604     lvx     $rndkey1,$idx,$key
  605     li      $idx,16
  606     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
  607     vncipherlast    $inout,$inout,$rndkey0
  608     ${UCMP}i    $len,16
  609 
  610     vxor        $inout,$inout,$ivec
  611     vmr     $ivec,$tmp
  612     vperm       $tmp,$inout,$inout,$outperm
  613     vsel        $inout,$outhead,$tmp,$outmask
  614     vmr     $outhead,$tmp
  615     stvx        $inout,0,$out
  616     addi        $out,$out,16
  617     bge     Lcbc_dec
  618 
  619 Lcbc_done:
  620     addi        $out,$out,-1
  621     lvx     $inout,0,$out       # redundant in aligned case
  622     vsel        $inout,$outhead,$inout,$outmask
  623     stvx        $inout,0,$out
  624 
  625     neg     $enc,$ivp       # write [unaligned] iv
  626     li      $idx,15         # 15 is not typo
  627     vxor        $rndkey0,$rndkey0,$rndkey0
  628     vspltisb    $outmask,-1
  629     le?vspltisb $tmp,0x0f
  630     ?lvsl       $outperm,0,$enc
  631     ?vperm      $outmask,$rndkey0,$outmask,$outperm
  632     le?vxor     $outperm,$outperm,$tmp
  633     lvx     $outhead,0,$ivp
  634     vperm       $ivec,$ivec,$ivec,$outperm
  635     vsel        $inout,$outhead,$ivec,$outmask
  636     lvx     $inptail,$idx,$ivp
  637     stvx        $inout,0,$ivp
  638     vsel        $inout,$ivec,$inptail,$outmask
  639     stvx        $inout,$idx,$ivp
  640 
  641     mtspr       256,$vrsave
  642     blr
  643     .long       0
  644     .byte       0,12,0x14,0,0,0,6,0
  645     .long       0
  646 ___
  647 #########################################################################
  648 {{  # Optimized CBC decrypt procedure               #
  649 my $key_="r11";
  650 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
  651     $x00=0 if ($flavour =~ /osx/);
  652 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
  653 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
  654 my $rndkey0="v23";  # v24-v25 rotating buffer for first found keys
  655             # v26-v31 last 6 round keys
  656 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
  657 
  658 $code.=<<___;
  659 .align  5
  660 _aesp8_cbc_decrypt8x:
  661     $STU        $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
  662     li      r10,`$FRAME+8*16+15`
  663     li      r11,`$FRAME+8*16+31`
  664     stvx        v20,r10,$sp     # ABI says so
  665     addi        r10,r10,32
  666     stvx        v21,r11,$sp
  667     addi        r11,r11,32
  668     stvx        v22,r10,$sp
  669     addi        r10,r10,32
  670     stvx        v23,r11,$sp
  671     addi        r11,r11,32
  672     stvx        v24,r10,$sp
  673     addi        r10,r10,32
  674     stvx        v25,r11,$sp
  675     addi        r11,r11,32
  676     stvx        v26,r10,$sp
  677     addi        r10,r10,32
  678     stvx        v27,r11,$sp
  679     addi        r11,r11,32
  680     stvx        v28,r10,$sp
  681     addi        r10,r10,32
  682     stvx        v29,r11,$sp
  683     addi        r11,r11,32
  684     stvx        v30,r10,$sp
  685     stvx        v31,r11,$sp
  686     li      r0,-1
  687     stw     $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
  688     li      $x10,0x10
  689     $PUSH       r26,`$FRAME+21*16+0*$SIZE_T`($sp)
  690     li      $x20,0x20
  691     $PUSH       r27,`$FRAME+21*16+1*$SIZE_T`($sp)
  692     li      $x30,0x30
  693     $PUSH       r28,`$FRAME+21*16+2*$SIZE_T`($sp)
  694     li      $x40,0x40
  695     $PUSH       r29,`$FRAME+21*16+3*$SIZE_T`($sp)
  696     li      $x50,0x50
  697     $PUSH       r30,`$FRAME+21*16+4*$SIZE_T`($sp)
  698     li      $x60,0x60
  699     $PUSH       r31,`$FRAME+21*16+5*$SIZE_T`($sp)
  700     li      $x70,0x70
  701     mtspr       256,r0
  702 
  703     subi        $rounds,$rounds,3   # -4 in total
  704     subi        $len,$len,128       # bias
  705 
  706     lvx     $rndkey0,$x00,$key  # load key schedule
  707     lvx     v30,$x10,$key
  708     addi        $key,$key,0x20
  709     lvx     v31,$x00,$key
  710     ?vperm      $rndkey0,$rndkey0,v30,$keyperm
  711     addi        $key_,$sp,$FRAME+15
  712     mtctr       $rounds
  713 
  714 Load_cbc_dec_key:
  715     ?vperm      v24,v30,v31,$keyperm
  716     lvx     v30,$x10,$key
  717     addi        $key,$key,0x20
  718     stvx        v24,$x00,$key_      # off-load round[1]
  719     ?vperm      v25,v31,v30,$keyperm
  720     lvx     v31,$x00,$key
  721     stvx        v25,$x10,$key_      # off-load round[2]
  722     addi        $key_,$key_,0x20
  723     bdnz        Load_cbc_dec_key
  724 
  725     lvx     v26,$x10,$key
  726     ?vperm      v24,v30,v31,$keyperm
  727     lvx     v27,$x20,$key
  728     stvx        v24,$x00,$key_      # off-load round[3]
  729     ?vperm      v25,v31,v26,$keyperm
  730     lvx     v28,$x30,$key
  731     stvx        v25,$x10,$key_      # off-load round[4]
  732     addi        $key_,$sp,$FRAME+15 # rewind $key_
  733     ?vperm      v26,v26,v27,$keyperm
  734     lvx     v29,$x40,$key
  735     ?vperm      v27,v27,v28,$keyperm
  736     lvx     v30,$x50,$key
  737     ?vperm      v28,v28,v29,$keyperm
  738     lvx     v31,$x60,$key
  739     ?vperm      v29,v29,v30,$keyperm
  740     lvx     $out0,$x70,$key     # borrow $out0
  741     ?vperm      v30,v30,v31,$keyperm
  742     lvx     v24,$x00,$key_      # pre-load round[1]
  743     ?vperm      v31,v31,$out0,$keyperm
  744     lvx     v25,$x10,$key_      # pre-load round[2]
  745 
  746     #lvx        $inptail,0,$inp     # "caller" already did this
  747     #addi       $inp,$inp,15        # 15 is not typo
  748     subi        $inp,$inp,15        # undo "caller"
  749 
  750      le?li      $idx,8
  751     lvx_u       $in0,$x00,$inp      # load first 8 "words"
  752      le?lvsl    $inpperm,0,$idx
  753      le?vspltisb    $tmp,0x0f
  754     lvx_u       $in1,$x10,$inp
  755      le?vxor    $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
  756     lvx_u       $in2,$x20,$inp
  757      le?vperm   $in0,$in0,$in0,$inpperm
  758     lvx_u       $in3,$x30,$inp
  759      le?vperm   $in1,$in1,$in1,$inpperm
  760     lvx_u       $in4,$x40,$inp
  761      le?vperm   $in2,$in2,$in2,$inpperm
  762     vxor        $out0,$in0,$rndkey0
  763     lvx_u       $in5,$x50,$inp
  764      le?vperm   $in3,$in3,$in3,$inpperm
  765     vxor        $out1,$in1,$rndkey0
  766     lvx_u       $in6,$x60,$inp
  767      le?vperm   $in4,$in4,$in4,$inpperm
  768     vxor        $out2,$in2,$rndkey0
  769     lvx_u       $in7,$x70,$inp
  770     addi        $inp,$inp,0x80
  771      le?vperm   $in5,$in5,$in5,$inpperm
  772     vxor        $out3,$in3,$rndkey0
  773      le?vperm   $in6,$in6,$in6,$inpperm
  774     vxor        $out4,$in4,$rndkey0
  775      le?vperm   $in7,$in7,$in7,$inpperm
  776     vxor        $out5,$in5,$rndkey0
  777     vxor        $out6,$in6,$rndkey0
  778     vxor        $out7,$in7,$rndkey0
  779 
  780     mtctr       $rounds
  781     b       Loop_cbc_dec8x
  782 .align  5
  783 Loop_cbc_dec8x:
  784     vncipher    $out0,$out0,v24
  785     vncipher    $out1,$out1,v24
  786     vncipher    $out2,$out2,v24
  787     vncipher    $out3,$out3,v24
  788     vncipher    $out4,$out4,v24
  789     vncipher    $out5,$out5,v24
  790     vncipher    $out6,$out6,v24
  791     vncipher    $out7,$out7,v24
  792     lvx     v24,$x20,$key_      # round[3]
  793     addi        $key_,$key_,0x20
  794 
  795     vncipher    $out0,$out0,v25
  796     vncipher    $out1,$out1,v25
  797     vncipher    $out2,$out2,v25
  798     vncipher    $out3,$out3,v25
  799     vncipher    $out4,$out4,v25
  800     vncipher    $out5,$out5,v25
  801     vncipher    $out6,$out6,v25
  802     vncipher    $out7,$out7,v25
  803     lvx     v25,$x10,$key_      # round[4]
  804     bdnz        Loop_cbc_dec8x
  805 
  806     subic       $len,$len,128       # $len-=128
  807     vncipher    $out0,$out0,v24
  808     vncipher    $out1,$out1,v24
  809     vncipher    $out2,$out2,v24
  810     vncipher    $out3,$out3,v24
  811     vncipher    $out4,$out4,v24
  812     vncipher    $out5,$out5,v24
  813     vncipher    $out6,$out6,v24
  814     vncipher    $out7,$out7,v24
  815 
  816     subfe.      r0,r0,r0        # borrow?-1:0
  817     vncipher    $out0,$out0,v25
  818     vncipher    $out1,$out1,v25
  819     vncipher    $out2,$out2,v25
  820     vncipher    $out3,$out3,v25
  821     vncipher    $out4,$out4,v25
  822     vncipher    $out5,$out5,v25
  823     vncipher    $out6,$out6,v25
  824     vncipher    $out7,$out7,v25
  825 
  826     and     r0,r0,$len
  827     vncipher    $out0,$out0,v26
  828     vncipher    $out1,$out1,v26
  829     vncipher    $out2,$out2,v26
  830     vncipher    $out3,$out3,v26
  831     vncipher    $out4,$out4,v26
  832     vncipher    $out5,$out5,v26
  833     vncipher    $out6,$out6,v26
  834     vncipher    $out7,$out7,v26
  835 
  836     add     $inp,$inp,r0        # $inp is adjusted in such
  837                         # way that at exit from the
  838                         # loop inX-in7 are loaded
  839                         # with last "words"
  840     vncipher    $out0,$out0,v27
  841     vncipher    $out1,$out1,v27
  842     vncipher    $out2,$out2,v27
  843     vncipher    $out3,$out3,v27
  844     vncipher    $out4,$out4,v27
  845     vncipher    $out5,$out5,v27
  846     vncipher    $out6,$out6,v27
  847     vncipher    $out7,$out7,v27
  848 
  849     addi        $key_,$sp,$FRAME+15 # rewind $key_
  850     vncipher    $out0,$out0,v28
  851     vncipher    $out1,$out1,v28
  852     vncipher    $out2,$out2,v28
  853     vncipher    $out3,$out3,v28
  854     vncipher    $out4,$out4,v28
  855     vncipher    $out5,$out5,v28
  856     vncipher    $out6,$out6,v28
  857     vncipher    $out7,$out7,v28
  858     lvx     v24,$x00,$key_      # re-pre-load round[1]
  859 
  860     vncipher    $out0,$out0,v29
  861     vncipher    $out1,$out1,v29
  862     vncipher    $out2,$out2,v29
  863     vncipher    $out3,$out3,v29
  864     vncipher    $out4,$out4,v29
  865     vncipher    $out5,$out5,v29
  866     vncipher    $out6,$out6,v29
  867     vncipher    $out7,$out7,v29
  868     lvx     v25,$x10,$key_      # re-pre-load round[2]
  869 
  870     vncipher    $out0,$out0,v30
  871      vxor       $ivec,$ivec,v31     # xor with last round key
  872     vncipher    $out1,$out1,v30
  873      vxor       $in0,$in0,v31
  874     vncipher    $out2,$out2,v30
  875      vxor       $in1,$in1,v31
  876     vncipher    $out3,$out3,v30
  877      vxor       $in2,$in2,v31
  878     vncipher    $out4,$out4,v30
  879      vxor       $in3,$in3,v31
  880     vncipher    $out5,$out5,v30
  881      vxor       $in4,$in4,v31
  882     vncipher    $out6,$out6,v30
  883      vxor       $in5,$in5,v31
  884     vncipher    $out7,$out7,v30
  885      vxor       $in6,$in6,v31
  886 
  887     vncipherlast    $out0,$out0,$ivec
  888     vncipherlast    $out1,$out1,$in0
  889      lvx_u      $in0,$x00,$inp      # load next input block
  890     vncipherlast    $out2,$out2,$in1
  891      lvx_u      $in1,$x10,$inp
  892     vncipherlast    $out3,$out3,$in2
  893      le?vperm   $in0,$in0,$in0,$inpperm
  894      lvx_u      $in2,$x20,$inp
  895     vncipherlast    $out4,$out4,$in3
  896      le?vperm   $in1,$in1,$in1,$inpperm
  897      lvx_u      $in3,$x30,$inp
  898     vncipherlast    $out5,$out5,$in4
  899      le?vperm   $in2,$in2,$in2,$inpperm
  900      lvx_u      $in4,$x40,$inp
  901     vncipherlast    $out6,$out6,$in5
  902      le?vperm   $in3,$in3,$in3,$inpperm
  903      lvx_u      $in5,$x50,$inp
  904     vncipherlast    $out7,$out7,$in6
  905      le?vperm   $in4,$in4,$in4,$inpperm
  906      lvx_u      $in6,$x60,$inp
  907     vmr     $ivec,$in7
  908      le?vperm   $in5,$in5,$in5,$inpperm
  909      lvx_u      $in7,$x70,$inp
  910      addi       $inp,$inp,0x80
  911 
  912     le?vperm    $out0,$out0,$out0,$inpperm
  913     le?vperm    $out1,$out1,$out1,$inpperm
  914     stvx_u      $out0,$x00,$out
  915      le?vperm   $in6,$in6,$in6,$inpperm
  916      vxor       $out0,$in0,$rndkey0
  917     le?vperm    $out2,$out2,$out2,$inpperm
  918     stvx_u      $out1,$x10,$out
  919      le?vperm   $in7,$in7,$in7,$inpperm
  920      vxor       $out1,$in1,$rndkey0
  921     le?vperm    $out3,$out3,$out3,$inpperm
  922     stvx_u      $out2,$x20,$out
  923      vxor       $out2,$in2,$rndkey0
  924     le?vperm    $out4,$out4,$out4,$inpperm
  925     stvx_u      $out3,$x30,$out
  926      vxor       $out3,$in3,$rndkey0
  927     le?vperm    $out5,$out5,$out5,$inpperm
  928     stvx_u      $out4,$x40,$out
  929      vxor       $out4,$in4,$rndkey0
  930     le?vperm    $out6,$out6,$out6,$inpperm
  931     stvx_u      $out5,$x50,$out
  932      vxor       $out5,$in5,$rndkey0
  933     le?vperm    $out7,$out7,$out7,$inpperm
  934     stvx_u      $out6,$x60,$out
  935      vxor       $out6,$in6,$rndkey0
  936     stvx_u      $out7,$x70,$out
  937     addi        $out,$out,0x80
  938      vxor       $out7,$in7,$rndkey0
  939 
  940     mtctr       $rounds
  941     beq     Loop_cbc_dec8x      # did $len-=128 borrow?
  942 
  943     addic.      $len,$len,128
  944     beq     Lcbc_dec8x_done
  945     nop
  946     nop
  947 
  948 Loop_cbc_dec8x_tail:                # up to 7 "words" tail...
  949     vncipher    $out1,$out1,v24
  950     vncipher    $out2,$out2,v24
  951     vncipher    $out3,$out3,v24
  952     vncipher    $out4,$out4,v24
  953     vncipher    $out5,$out5,v24
  954     vncipher    $out6,$out6,v24
  955     vncipher    $out7,$out7,v24
  956     lvx     v24,$x20,$key_      # round[3]
  957     addi        $key_,$key_,0x20
  958 
  959     vncipher    $out1,$out1,v25
  960     vncipher    $out2,$out2,v25
  961     vncipher    $out3,$out3,v25
  962     vncipher    $out4,$out4,v25
  963     vncipher    $out5,$out5,v25
  964     vncipher    $out6,$out6,v25
  965     vncipher    $out7,$out7,v25
  966     lvx     v25,$x10,$key_      # round[4]
  967     bdnz        Loop_cbc_dec8x_tail
  968 
  969     vncipher    $out1,$out1,v24
  970     vncipher    $out2,$out2,v24
  971     vncipher    $out3,$out3,v24
  972     vncipher    $out4,$out4,v24
  973     vncipher    $out5,$out5,v24
  974     vncipher    $out6,$out6,v24
  975     vncipher    $out7,$out7,v24
  976 
  977     vncipher    $out1,$out1,v25
  978     vncipher    $out2,$out2,v25
  979     vncipher    $out3,$out3,v25
  980     vncipher    $out4,$out4,v25
  981     vncipher    $out5,$out5,v25
  982     vncipher    $out6,$out6,v25
  983     vncipher    $out7,$out7,v25
  984 
  985     vncipher    $out1,$out1,v26
  986     vncipher    $out2,$out2,v26
  987     vncipher    $out3,$out3,v26
  988     vncipher    $out4,$out4,v26
  989     vncipher    $out5,$out5,v26
  990     vncipher    $out6,$out6,v26
  991     vncipher    $out7,$out7,v26
  992 
  993     vncipher    $out1,$out1,v27
  994     vncipher    $out2,$out2,v27
  995     vncipher    $out3,$out3,v27
  996     vncipher    $out4,$out4,v27
  997     vncipher    $out5,$out5,v27
  998     vncipher    $out6,$out6,v27
  999     vncipher    $out7,$out7,v27
 1000 
 1001     vncipher    $out1,$out1,v28
 1002     vncipher    $out2,$out2,v28
 1003     vncipher    $out3,$out3,v28
 1004     vncipher    $out4,$out4,v28
 1005     vncipher    $out5,$out5,v28
 1006     vncipher    $out6,$out6,v28
 1007     vncipher    $out7,$out7,v28
 1008 
 1009     vncipher    $out1,$out1,v29
 1010     vncipher    $out2,$out2,v29
 1011     vncipher    $out3,$out3,v29
 1012     vncipher    $out4,$out4,v29
 1013     vncipher    $out5,$out5,v29
 1014     vncipher    $out6,$out6,v29
 1015     vncipher    $out7,$out7,v29
 1016 
 1017     vncipher    $out1,$out1,v30
 1018      vxor       $ivec,$ivec,v31     # last round key
 1019     vncipher    $out2,$out2,v30
 1020      vxor       $in1,$in1,v31
 1021     vncipher    $out3,$out3,v30
 1022      vxor       $in2,$in2,v31
 1023     vncipher    $out4,$out4,v30
 1024      vxor       $in3,$in3,v31
 1025     vncipher    $out5,$out5,v30
 1026      vxor       $in4,$in4,v31
 1027     vncipher    $out6,$out6,v30
 1028      vxor       $in5,$in5,v31
 1029     vncipher    $out7,$out7,v30
 1030      vxor       $in6,$in6,v31
 1031 
 1032     cmplwi      $len,32         # switch($len)
 1033     blt     Lcbc_dec8x_one
 1034     nop
 1035     beq     Lcbc_dec8x_two
 1036     cmplwi      $len,64
 1037     blt     Lcbc_dec8x_three
 1038     nop
 1039     beq     Lcbc_dec8x_four
 1040     cmplwi      $len,96
 1041     blt     Lcbc_dec8x_five
 1042     nop
 1043     beq     Lcbc_dec8x_six
 1044 
 1045 Lcbc_dec8x_seven:
 1046     vncipherlast    $out1,$out1,$ivec
 1047     vncipherlast    $out2,$out2,$in1
 1048     vncipherlast    $out3,$out3,$in2
 1049     vncipherlast    $out4,$out4,$in3
 1050     vncipherlast    $out5,$out5,$in4
 1051     vncipherlast    $out6,$out6,$in5
 1052     vncipherlast    $out7,$out7,$in6
 1053     vmr     $ivec,$in7
 1054 
 1055     le?vperm    $out1,$out1,$out1,$inpperm
 1056     le?vperm    $out2,$out2,$out2,$inpperm
 1057     stvx_u      $out1,$x00,$out
 1058     le?vperm    $out3,$out3,$out3,$inpperm
 1059     stvx_u      $out2,$x10,$out
 1060     le?vperm    $out4,$out4,$out4,$inpperm
 1061     stvx_u      $out3,$x20,$out
 1062     le?vperm    $out5,$out5,$out5,$inpperm
 1063     stvx_u      $out4,$x30,$out
 1064     le?vperm    $out6,$out6,$out6,$inpperm
 1065     stvx_u      $out5,$x40,$out
 1066     le?vperm    $out7,$out7,$out7,$inpperm
 1067     stvx_u      $out6,$x50,$out
 1068     stvx_u      $out7,$x60,$out
 1069     addi        $out,$out,0x70
 1070     b       Lcbc_dec8x_done
 1071 
 1072 .align  5
 1073 Lcbc_dec8x_six:
 1074     vncipherlast    $out2,$out2,$ivec
 1075     vncipherlast    $out3,$out3,$in2
 1076     vncipherlast    $out4,$out4,$in3
 1077     vncipherlast    $out5,$out5,$in4
 1078     vncipherlast    $out6,$out6,$in5
 1079     vncipherlast    $out7,$out7,$in6
 1080     vmr     $ivec,$in7
 1081 
 1082     le?vperm    $out2,$out2,$out2,$inpperm
 1083     le?vperm    $out3,$out3,$out3,$inpperm
 1084     stvx_u      $out2,$x00,$out
 1085     le?vperm    $out4,$out4,$out4,$inpperm
 1086     stvx_u      $out3,$x10,$out
 1087     le?vperm    $out5,$out5,$out5,$inpperm
 1088     stvx_u      $out4,$x20,$out
 1089     le?vperm    $out6,$out6,$out6,$inpperm
 1090     stvx_u      $out5,$x30,$out
 1091     le?vperm    $out7,$out7,$out7,$inpperm
 1092     stvx_u      $out6,$x40,$out
 1093     stvx_u      $out7,$x50,$out
 1094     addi        $out,$out,0x60
 1095     b       Lcbc_dec8x_done
 1096 
 1097 .align  5
 1098 Lcbc_dec8x_five:
 1099     vncipherlast    $out3,$out3,$ivec
 1100     vncipherlast    $out4,$out4,$in3
 1101     vncipherlast    $out5,$out5,$in4
 1102     vncipherlast    $out6,$out6,$in5
 1103     vncipherlast    $out7,$out7,$in6
 1104     vmr     $ivec,$in7
 1105 
 1106     le?vperm    $out3,$out3,$out3,$inpperm
 1107     le?vperm    $out4,$out4,$out4,$inpperm
 1108     stvx_u      $out3,$x00,$out
 1109     le?vperm    $out5,$out5,$out5,$inpperm
 1110     stvx_u      $out4,$x10,$out
 1111     le?vperm    $out6,$out6,$out6,$inpperm
 1112     stvx_u      $out5,$x20,$out
 1113     le?vperm    $out7,$out7,$out7,$inpperm
 1114     stvx_u      $out6,$x30,$out
 1115     stvx_u      $out7,$x40,$out
 1116     addi        $out,$out,0x50
 1117     b       Lcbc_dec8x_done
 1118 
 1119 .align  5
 1120 Lcbc_dec8x_four:
 1121     vncipherlast    $out4,$out4,$ivec
 1122     vncipherlast    $out5,$out5,$in4
 1123     vncipherlast    $out6,$out6,$in5
 1124     vncipherlast    $out7,$out7,$in6
 1125     vmr     $ivec,$in7
 1126 
 1127     le?vperm    $out4,$out4,$out4,$inpperm
 1128     le?vperm    $out5,$out5,$out5,$inpperm
 1129     stvx_u      $out4,$x00,$out
 1130     le?vperm    $out6,$out6,$out6,$inpperm
 1131     stvx_u      $out5,$x10,$out
 1132     le?vperm    $out7,$out7,$out7,$inpperm
 1133     stvx_u      $out6,$x20,$out
 1134     stvx_u      $out7,$x30,$out
 1135     addi        $out,$out,0x40
 1136     b       Lcbc_dec8x_done
 1137 
 1138 .align  5
 1139 Lcbc_dec8x_three:
 1140     vncipherlast    $out5,$out5,$ivec
 1141     vncipherlast    $out6,$out6,$in5
 1142     vncipherlast    $out7,$out7,$in6
 1143     vmr     $ivec,$in7
 1144 
 1145     le?vperm    $out5,$out5,$out5,$inpperm
 1146     le?vperm    $out6,$out6,$out6,$inpperm
 1147     stvx_u      $out5,$x00,$out
 1148     le?vperm    $out7,$out7,$out7,$inpperm
 1149     stvx_u      $out6,$x10,$out
 1150     stvx_u      $out7,$x20,$out
 1151     addi        $out,$out,0x30
 1152     b       Lcbc_dec8x_done
 1153 
 1154 .align  5
 1155 Lcbc_dec8x_two:
 1156     vncipherlast    $out6,$out6,$ivec
 1157     vncipherlast    $out7,$out7,$in6
 1158     vmr     $ivec,$in7
 1159 
 1160     le?vperm    $out6,$out6,$out6,$inpperm
 1161     le?vperm    $out7,$out7,$out7,$inpperm
 1162     stvx_u      $out6,$x00,$out
 1163     stvx_u      $out7,$x10,$out
 1164     addi        $out,$out,0x20
 1165     b       Lcbc_dec8x_done
 1166 
 1167 .align  5
 1168 Lcbc_dec8x_one:
 1169     vncipherlast    $out7,$out7,$ivec
 1170     vmr     $ivec,$in7
 1171 
 1172     le?vperm    $out7,$out7,$out7,$inpperm
 1173     stvx_u      $out7,0,$out
 1174     addi        $out,$out,0x10
 1175 
 1176 Lcbc_dec8x_done:
 1177     le?vperm    $ivec,$ivec,$ivec,$inpperm
 1178     stvx_u      $ivec,0,$ivp        # write [unaligned] iv
 1179 
 1180     li      r10,`$FRAME+15`
 1181     li      r11,`$FRAME+31`
 1182     stvx        $inpperm,r10,$sp    # wipe copies of round keys
 1183     addi        r10,r10,32
 1184     stvx        $inpperm,r11,$sp
 1185     addi        r11,r11,32
 1186     stvx        $inpperm,r10,$sp
 1187     addi        r10,r10,32
 1188     stvx        $inpperm,r11,$sp
 1189     addi        r11,r11,32
 1190     stvx        $inpperm,r10,$sp
 1191     addi        r10,r10,32
 1192     stvx        $inpperm,r11,$sp
 1193     addi        r11,r11,32
 1194     stvx        $inpperm,r10,$sp
 1195     addi        r10,r10,32
 1196     stvx        $inpperm,r11,$sp
 1197     addi        r11,r11,32
 1198 
 1199     mtspr       256,$vrsave
 1200     lvx     v20,r10,$sp     # ABI says so
 1201     addi        r10,r10,32
 1202     lvx     v21,r11,$sp
 1203     addi        r11,r11,32
 1204     lvx     v22,r10,$sp
 1205     addi        r10,r10,32
 1206     lvx     v23,r11,$sp
 1207     addi        r11,r11,32
 1208     lvx     v24,r10,$sp
 1209     addi        r10,r10,32
 1210     lvx     v25,r11,$sp
 1211     addi        r11,r11,32
 1212     lvx     v26,r10,$sp
 1213     addi        r10,r10,32
 1214     lvx     v27,r11,$sp
 1215     addi        r11,r11,32
 1216     lvx     v28,r10,$sp
 1217     addi        r10,r10,32
 1218     lvx     v29,r11,$sp
 1219     addi        r11,r11,32
 1220     lvx     v30,r10,$sp
 1221     lvx     v31,r11,$sp
 1222     $POP        r26,`$FRAME+21*16+0*$SIZE_T`($sp)
 1223     $POP        r27,`$FRAME+21*16+1*$SIZE_T`($sp)
 1224     $POP        r28,`$FRAME+21*16+2*$SIZE_T`($sp)
 1225     $POP        r29,`$FRAME+21*16+3*$SIZE_T`($sp)
 1226     $POP        r30,`$FRAME+21*16+4*$SIZE_T`($sp)
 1227     $POP        r31,`$FRAME+21*16+5*$SIZE_T`($sp)
 1228     addi        $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
 1229     blr
 1230     .long       0
 1231     .byte       0,12,0x04,0,0x80,6,6,0
 1232     .long       0
 1233 .size   .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
 1234 ___
 1235 }}  }}}
 1236 
 1237 #########################################################################
 1238 {{{ # CTR procedure[s]                      #
 1239 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
 1240 my ($rndkey0,$rndkey1,$inout,$tmp)=     map("v$_",(0..3));
 1241 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
 1242                         map("v$_",(4..11));
 1243 my $dat=$tmp;
 1244 
 1245 $code.=<<___;
 1246 .globl  .${prefix}_ctr32_encrypt_blocks
 1247 .align  5
 1248 .${prefix}_ctr32_encrypt_blocks:
 1249     ${UCMP}i    $len,1
 1250     bltlr-
 1251 
 1252     lis     r0,0xfff0
 1253     mfspr       $vrsave,256
 1254     mtspr       256,r0
 1255 
 1256     li      $idx,15
 1257     vxor        $rndkey0,$rndkey0,$rndkey0
 1258     le?vspltisb $tmp,0x0f
 1259 
 1260     lvx     $ivec,0,$ivp        # load [unaligned] iv
 1261     lvsl        $inpperm,0,$ivp
 1262     lvx     $inptail,$idx,$ivp
 1263      vspltisb   $one,1
 1264     le?vxor     $inpperm,$inpperm,$tmp
 1265     vperm       $ivec,$ivec,$inptail,$inpperm
 1266      vsldoi     $one,$rndkey0,$one,1
 1267 
 1268     neg     r11,$inp
 1269     ?lvsl       $keyperm,0,$key     # prepare for unaligned key
 1270     lwz     $rounds,240($key)
 1271 
 1272     lvsr        $inpperm,0,r11      # prepare for unaligned load
 1273     lvx     $inptail,0,$inp
 1274     addi        $inp,$inp,15        # 15 is not typo
 1275     le?vxor     $inpperm,$inpperm,$tmp
 1276 
 1277     srwi        $rounds,$rounds,1
 1278     li      $idx,16
 1279     subi        $rounds,$rounds,1
 1280 
 1281     ${UCMP}i    $len,8
 1282     bge     _aesp8_ctr32_encrypt8x
 1283 
 1284     ?lvsr       $outperm,0,$out     # prepare for unaligned store
 1285     vspltisb    $outmask,-1
 1286     lvx     $outhead,0,$out
 1287     ?vperm      $outmask,$rndkey0,$outmask,$outperm
 1288     le?vxor     $outperm,$outperm,$tmp
 1289 
 1290     lvx     $rndkey0,0,$key
 1291     mtctr       $rounds
 1292     lvx     $rndkey1,$idx,$key
 1293     addi        $idx,$idx,16
 1294     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
 1295     vxor        $inout,$ivec,$rndkey0
 1296     lvx     $rndkey0,$idx,$key
 1297     addi        $idx,$idx,16
 1298     b       Loop_ctr32_enc
 1299 
 1300 .align  5
 1301 Loop_ctr32_enc:
 1302     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
 1303     vcipher     $inout,$inout,$rndkey1
 1304     lvx     $rndkey1,$idx,$key
 1305     addi        $idx,$idx,16
 1306     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
 1307     vcipher     $inout,$inout,$rndkey0
 1308     lvx     $rndkey0,$idx,$key
 1309     addi        $idx,$idx,16
 1310     bdnz        Loop_ctr32_enc
 1311 
 1312     vadduwm     $ivec,$ivec,$one
 1313      vmr        $dat,$inptail
 1314      lvx        $inptail,0,$inp
 1315      addi       $inp,$inp,16
 1316      subic.     $len,$len,1     # blocks--
 1317 
 1318     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
 1319     vcipher     $inout,$inout,$rndkey1
 1320     lvx     $rndkey1,$idx,$key
 1321      vperm      $dat,$dat,$inptail,$inpperm
 1322      li     $idx,16
 1323     ?vperm      $rndkey1,$rndkey0,$rndkey1,$keyperm
 1324      lvx        $rndkey0,0,$key
 1325     vxor        $dat,$dat,$rndkey1  # last round key
 1326     vcipherlast $inout,$inout,$dat
 1327 
 1328      lvx        $rndkey1,$idx,$key
 1329      addi       $idx,$idx,16
 1330     vperm       $inout,$inout,$inout,$outperm
 1331     vsel        $dat,$outhead,$inout,$outmask
 1332      mtctr      $rounds
 1333      ?vperm     $rndkey0,$rndkey0,$rndkey1,$keyperm
 1334     vmr     $outhead,$inout
 1335      vxor       $inout,$ivec,$rndkey0
 1336      lvx        $rndkey0,$idx,$key
 1337      addi       $idx,$idx,16
 1338     stvx        $dat,0,$out
 1339     addi        $out,$out,16
 1340     bne     Loop_ctr32_enc
 1341 
 1342     addi        $out,$out,-1
 1343     lvx     $inout,0,$out       # redundant in aligned case
 1344     vsel        $inout,$outhead,$inout,$outmask
 1345     stvx        $inout,0,$out
 1346 
 1347     mtspr       256,$vrsave
 1348     blr
 1349     .long       0
 1350     .byte       0,12,0x14,0,0,0,6,0
 1351     .long       0
 1352 ___
 1353 #########################################################################
 1354 {{  # Optimized CTR procedure                   #
 1355 my $key_="r11";
 1356 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
 1357     $x00=0 if ($flavour =~ /osx/);
 1358 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
 1359 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
 1360 my $rndkey0="v23";  # v24-v25 rotating buffer for first found keys
 1361             # v26-v31 last 6 round keys
 1362 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
 1363 my ($two,$three,$four)=($outhead,$outperm,$outmask);
 1364 
 1365 $code.=<<___;
 1366 .align  5
 1367 _aesp8_ctr32_encrypt8x:
 1368     $STU        $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
 1369     li      r10,`$FRAME+8*16+15`
 1370     li      r11,`$FRAME+8*16+31`
 1371     stvx        v20,r10,$sp     # ABI says so
 1372     addi        r10,r10,32
 1373     stvx        v21,r11,$sp
 1374     addi        r11,r11,32
 1375     stvx        v22,r10,$sp
 1376     addi        r10,r10,32
 1377     stvx        v23,r11,$sp
 1378     addi        r11,r11,32
 1379     stvx        v24,r10,$sp
 1380     addi        r10,r10,32
 1381     stvx        v25,r11,$sp
 1382     addi        r11,r11,32
 1383     stvx        v26,r10,$sp
 1384     addi        r10,r10,32
 1385     stvx        v27,r11,$sp
 1386     addi        r11,r11,32
 1387     stvx        v28,r10,$sp
 1388     addi        r10,r10,32
 1389     stvx        v29,r11,$sp
 1390     addi        r11,r11,32
 1391     stvx        v30,r10,$sp
 1392     stvx        v31,r11,$sp
 1393     li      r0,-1
 1394     stw     $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
 1395     li      $x10,0x10
 1396     $PUSH       r26,`$FRAME+21*16+0*$SIZE_T`($sp)
 1397     li      $x20,0x20
 1398     $PUSH       r27,`$FRAME+21*16+1*$SIZE_T`($sp)
 1399     li      $x30,0x30
 1400     $PUSH       r28,`$FRAME+21*16+2*$SIZE_T`($sp)
 1401     li      $x40,0x40
 1402     $PUSH       r29,`$FRAME+21*16+3*$SIZE_T`($sp)
 1403     li      $x50,0x50
 1404     $PUSH       r30,`$FRAME+21*16+4*$SIZE_T`($sp)
 1405     li      $x60,0x60
 1406     $PUSH       r31,`$FRAME+21*16+5*$SIZE_T`($sp)
 1407     li      $x70,0x70
 1408     mtspr       256,r0
 1409 
 1410     subi        $rounds,$rounds,3   # -4 in total
 1411 
 1412     lvx     $rndkey0,$x00,$key  # load key schedule
 1413     lvx     v30,$x10,$key
 1414     addi        $key,$key,0x20
 1415     lvx     v31,$x00,$key
 1416     ?vperm      $rndkey0,$rndkey0,v30,$keyperm
 1417     addi        $key_,$sp,$FRAME+15
 1418     mtctr       $rounds
 1419 
 1420 Load_ctr32_enc_key:
 1421     ?vperm      v24,v30,v31,$keyperm
 1422     lvx     v30,$x10,$key
 1423     addi        $key,$key,0x20
 1424     stvx        v24,$x00,$key_      # off-load round[1]
 1425     ?vperm      v25,v31,v30,$keyperm
 1426     lvx     v31,$x00,$key
 1427     stvx        v25,$x10,$key_      # off-load round[2]
 1428     addi        $key_,$key_,0x20
 1429     bdnz        Load_ctr32_enc_key
 1430 
 1431     lvx     v26,$x10,$key
 1432     ?vperm      v24,v30,v31,$keyperm
 1433     lvx     v27,$x20,$key
 1434     stvx        v24,$x00,$key_      # off-load round[3]
 1435     ?vperm      v25,v31,v26,$keyperm
 1436     lvx     v28,$x30,$key
 1437     stvx        v25,$x10,$key_      # off-load round[4]
 1438     addi        $key_,$sp,$FRAME+15 # rewind $key_
 1439     ?vperm      v26,v26,v27,$keyperm
 1440     lvx     v29,$x40,$key
 1441     ?vperm      v27,v27,v28,$keyperm
 1442     lvx     v30,$x50,$key
 1443     ?vperm      v28,v28,v29,$keyperm
 1444     lvx     v31,$x60,$key
 1445     ?vperm      v29,v29,v30,$keyperm
 1446     lvx     $out0,$x70,$key     # borrow $out0
 1447     ?vperm      v30,v30,v31,$keyperm
 1448     lvx     v24,$x00,$key_      # pre-load round[1]
 1449     ?vperm      v31,v31,$out0,$keyperm
 1450     lvx     v25,$x10,$key_      # pre-load round[2]
 1451 
 1452     vadduwm     $two,$one,$one
 1453     subi        $inp,$inp,15        # undo "caller"
 1454     $SHL        $len,$len,4
 1455 
 1456     vadduwm     $out1,$ivec,$one    # counter values ...
 1457     vadduwm     $out2,$ivec,$two
 1458     vxor        $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
 1459      le?li      $idx,8
 1460     vadduwm     $out3,$out1,$two
 1461     vxor        $out1,$out1,$rndkey0
 1462      le?lvsl    $inpperm,0,$idx
 1463     vadduwm     $out4,$out2,$two
 1464     vxor        $out2,$out2,$rndkey0
 1465      le?vspltisb    $tmp,0x0f
 1466     vadduwm     $out5,$out3,$two
 1467     vxor        $out3,$out3,$rndkey0
 1468      le?vxor    $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
 1469     vadduwm     $out6,$out4,$two
 1470     vxor        $out4,$out4,$rndkey0
 1471     vadduwm     $out7,$out5,$two
 1472     vxor        $out5,$out5,$rndkey0
 1473     vadduwm     $ivec,$out6,$two    # next counter value
 1474     vxor        $out6,$out6,$rndkey0
 1475     vxor        $out7,$out7,$rndkey0
 1476 
 1477     mtctr       $rounds
 1478     b       Loop_ctr32_enc8x
 1479 .align  5
 1480 Loop_ctr32_enc8x:
 1481     vcipher     $out0,$out0,v24
 1482     vcipher     $out1,$out1,v24
 1483     vcipher     $out2,$out2,v24
 1484     vcipher     $out3,$out3,v24
 1485     vcipher     $out4,$out4,v24
 1486     vcipher     $out5,$out5,v24
 1487     vcipher     $out6,$out6,v24
 1488     vcipher     $out7,$out7,v24
 1489 Loop_ctr32_enc8x_middle:
 1490     lvx     v24,$x20,$key_      # round[3]
 1491     addi        $key_,$key_,0x20
 1492 
 1493     vcipher     $out0,$out0,v25
 1494     vcipher     $out1,$out1,v25
 1495     vcipher     $out2,$out2,v25
 1496     vcipher     $out3,$out3,v25
 1497     vcipher     $out4,$out4,v25
 1498     vcipher     $out5,$out5,v25
 1499     vcipher     $out6,$out6,v25
 1500     vcipher     $out7,$out7,v25
 1501     lvx     v25,$x10,$key_      # round[4]
 1502     bdnz        Loop_ctr32_enc8x
 1503 
 1504     subic       r11,$len,256        # $len-256, borrow $key_
 1505     vcipher     $out0,$out0,v24
 1506     vcipher     $out1,$out1,v24
 1507     vcipher     $out2,$out2,v24
 1508     vcipher     $out3,$out3,v24
 1509     vcipher     $out4,$out4,v24
 1510     vcipher     $out5,$out5,v24
 1511     vcipher     $out6,$out6,v24
 1512     vcipher     $out7,$out7,v24
 1513 
 1514     subfe       r0,r0,r0        # borrow?-1:0
 1515     vcipher     $out0,$out0,v25
 1516     vcipher     $out1,$out1,v25
 1517     vcipher     $out2,$out2,v25
 1518     vcipher     $out3,$out3,v25
 1519     vcipher     $out4,$out4,v25
 1520     vcipher     $out5,$out5,v25
 1521     vcipher     $out6,$out6,v25
 1522     vcipher     $out7,$out7,v25
 1523 
 1524     and     r0,r0,r11
 1525     addi        $key_,$sp,$FRAME+15 # rewind $key_
 1526     vcipher     $out0,$out0,v26
 1527     vcipher     $out1,$out1,v26
 1528     vcipher     $out2,$out2,v26
 1529     vcipher     $out3,$out3,v26
 1530     vcipher     $out4,$out4,v26
 1531     vcipher     $out5,$out5,v26
 1532     vcipher     $out6,$out6,v26
 1533     vcipher     $out7,$out7,v26
 1534     lvx     v24,$x00,$key_      # re-pre-load round[1]
 1535 
 1536     subic       $len,$len,129       # $len-=129
 1537     vcipher     $out0,$out0,v27
 1538     addi        $len,$len,1     # $len-=128 really
 1539     vcipher     $out1,$out1,v27
 1540     vcipher     $out2,$out2,v27
 1541     vcipher     $out3,$out3,v27
 1542     vcipher     $out4,$out4,v27
 1543     vcipher     $out5,$out5,v27
 1544     vcipher     $out6,$out6,v27
 1545     vcipher     $out7,$out7,v27
 1546     lvx     v25,$x10,$key_      # re-pre-load round[2]
 1547 
 1548     vcipher     $out0,$out0,v28
 1549      lvx_u      $in0,$x00,$inp      # load input
 1550     vcipher     $out1,$out1,v28
 1551      lvx_u      $in1,$x10,$inp
 1552     vcipher     $out2,$out2,v28
 1553      lvx_u      $in2,$x20,$inp
 1554     vcipher     $out3,$out3,v28
 1555      lvx_u      $in3,$x30,$inp
 1556     vcipher     $out4,$out4,v28
 1557      lvx_u      $in4,$x40,$inp
 1558     vcipher     $out5,$out5,v28
 1559      lvx_u      $in5,$x50,$inp
 1560     vcipher     $out6,$out6,v28
 1561      lvx_u      $in6,$x60,$inp
 1562     vcipher     $out7,$out7,v28
 1563      lvx_u      $in7,$x70,$inp
 1564      addi       $inp,$inp,0x80
 1565 
 1566     vcipher     $out0,$out0,v29
 1567      le?vperm   $in0,$in0,$in0,$inpperm
 1568     vcipher     $out1,$out1,v29
 1569      le?vperm   $in1,$in1,$in1,$inpperm
 1570     vcipher     $out2,$out2,v29
 1571      le?vperm   $in2,$in2,$in2,$inpperm
 1572     vcipher     $out3,$out3,v29
 1573      le?vperm   $in3,$in3,$in3,$inpperm
 1574     vcipher     $out4,$out4,v29
 1575      le?vperm   $in4,$in4,$in4,$inpperm
 1576     vcipher     $out5,$out5,v29
 1577      le?vperm   $in5,$in5,$in5,$inpperm
 1578     vcipher     $out6,$out6,v29
 1579      le?vperm   $in6,$in6,$in6,$inpperm
 1580     vcipher     $out7,$out7,v29
 1581      le?vperm   $in7,$in7,$in7,$inpperm
 1582 
 1583     add     $inp,$inp,r0        # $inp is adjusted in such
 1584                         # way that at exit from the
 1585                         # loop inX-in7 are loaded
 1586                         # with last "words"
 1587     subfe.      r0,r0,r0        # borrow?-1:0
 1588     vcipher     $out0,$out0,v30
 1589      vxor       $in0,$in0,v31       # xor with last round key
 1590     vcipher     $out1,$out1,v30
 1591      vxor       $in1,$in1,v31
 1592     vcipher     $out2,$out2,v30
 1593      vxor       $in2,$in2,v31
 1594     vcipher     $out3,$out3,v30
 1595      vxor       $in3,$in3,v31
 1596     vcipher     $out4,$out4,v30
 1597      vxor       $in4,$in4,v31
 1598     vcipher     $out5,$out5,v30
 1599      vxor       $in5,$in5,v31
 1600     vcipher     $out6,$out6,v30
 1601      vxor       $in6,$in6,v31
 1602     vcipher     $out7,$out7,v30
 1603      vxor       $in7,$in7,v31
 1604 
 1605     bne     Lctr32_enc8x_break  # did $len-129 borrow?
 1606 
 1607     vcipherlast $in0,$out0,$in0
 1608     vcipherlast $in1,$out1,$in1
 1609      vadduwm    $out1,$ivec,$one    # counter values ...
 1610     vcipherlast $in2,$out2,$in2
 1611      vadduwm    $out2,$ivec,$two
 1612      vxor       $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
 1613     vcipherlast $in3,$out3,$in3
 1614      vadduwm    $out3,$out1,$two
 1615      vxor       $out1,$out1,$rndkey0
 1616     vcipherlast $in4,$out4,$in4
 1617      vadduwm    $out4,$out2,$two
 1618      vxor       $out2,$out2,$rndkey0
 1619     vcipherlast $in5,$out5,$in5
 1620      vadduwm    $out5,$out3,$two
 1621      vxor       $out3,$out3,$rndkey0
 1622     vcipherlast $in6,$out6,$in6
 1623      vadduwm    $out6,$out4,$two
 1624      vxor       $out4,$out4,$rndkey0
 1625     vcipherlast $in7,$out7,$in7
 1626      vadduwm    $out7,$out5,$two
 1627      vxor       $out5,$out5,$rndkey0
 1628     le?vperm    $in0,$in0,$in0,$inpperm
 1629      vadduwm    $ivec,$out6,$two    # next counter value
 1630      vxor       $out6,$out6,$rndkey0
 1631     le?vperm    $in1,$in1,$in1,$inpperm
 1632      vxor       $out7,$out7,$rndkey0
 1633     mtctr       $rounds
 1634 
 1635      vcipher    $out0,$out0,v24
 1636     stvx_u      $in0,$x00,$out
 1637     le?vperm    $in2,$in2,$in2,$inpperm
 1638      vcipher    $out1,$out1,v24
 1639     stvx_u      $in1,$x10,$out
 1640     le?vperm    $in3,$in3,$in3,$inpperm
 1641      vcipher    $out2,$out2,v24
 1642     stvx_u      $in2,$x20,$out
 1643     le?vperm    $in4,$in4,$in4,$inpperm
 1644      vcipher    $out3,$out3,v24
 1645     stvx_u      $in3,$x30,$out
 1646     le?vperm    $in5,$in5,$in5,$inpperm
 1647      vcipher    $out4,$out4,v24
 1648     stvx_u      $in4,$x40,$out
 1649     le?vperm    $in6,$in6,$in6,$inpperm
 1650      vcipher    $out5,$out5,v24
 1651     stvx_u      $in5,$x50,$out
 1652     le?vperm    $in7,$in7,$in7,$inpperm
 1653      vcipher    $out6,$out6,v24
 1654     stvx_u      $in6,$x60,$out
 1655      vcipher    $out7,$out7,v24
 1656     stvx_u      $in7,$x70,$out
 1657     addi        $out,$out,0x80
 1658 
 1659     b       Loop_ctr32_enc8x_middle
 1660 
 1661 .align  5
 1662 Lctr32_enc8x_break:
 1663     cmpwi       $len,-0x60
 1664     blt     Lctr32_enc8x_one
 1665     nop
 1666     beq     Lctr32_enc8x_two
 1667     cmpwi       $len,-0x40
 1668     blt     Lctr32_enc8x_three
 1669     nop
 1670     beq     Lctr32_enc8x_four
 1671     cmpwi       $len,-0x20
 1672     blt     Lctr32_enc8x_five
 1673     nop
 1674     beq     Lctr32_enc8x_six
 1675     cmpwi       $len,0x00
 1676     blt     Lctr32_enc8x_seven
 1677 
 1678 Lctr32_enc8x_eight:
 1679     vcipherlast $out0,$out0,$in0
 1680     vcipherlast $out1,$out1,$in1
 1681     vcipherlast $out2,$out2,$in2
 1682     vcipherlast $out3,$out3,$in3
 1683     vcipherlast $out4,$out4,$in4
 1684     vcipherlast $out5,$out5,$in5
 1685     vcipherlast $out6,$out6,$in6
 1686     vcipherlast $out7,$out7,$in7
 1687 
 1688     le?vperm    $out0,$out0,$out0,$inpperm
 1689     le?vperm    $out1,$out1,$out1,$inpperm
 1690     stvx_u      $out0,$x00,$out
 1691     le?vperm    $out2,$out2,$out2,$inpperm
 1692     stvx_u      $out1,$x10,$out
 1693     le?vperm    $out3,$out3,$out3,$inpperm
 1694     stvx_u      $out2,$x20,$out
 1695     le?vperm    $out4,$out4,$out4,$inpperm
 1696     stvx_u      $out3,$x30,$out
 1697     le?vperm    $out5,$out5,$out5,$inpperm
 1698     stvx_u      $out4,$x40,$out
 1699     le?vperm    $out6,$out6,$out6,$inpperm
 1700     stvx_u      $out5,$x50,$out
 1701     le?vperm    $out7,$out7,$out7,$inpperm
 1702     stvx_u      $out6,$x60,$out
 1703     stvx_u      $out7,$x70,$out
 1704     addi        $out,$out,0x80
 1705     b       Lctr32_enc8x_done
 1706 
 1707 .align  5
 1708 Lctr32_enc8x_seven:
 1709     vcipherlast $out0,$out0,$in1
 1710     vcipherlast $out1,$out1,$in2
 1711     vcipherlast $out2,$out2,$in3
 1712     vcipherlast $out3,$out3,$in4
 1713     vcipherlast $out4,$out4,$in5
 1714     vcipherlast $out5,$out5,$in6
 1715     vcipherlast $out6,$out6,$in7
 1716 
 1717     le?vperm    $out0,$out0,$out0,$inpperm
 1718     le?vperm    $out1,$out1,$out1,$inpperm
 1719     stvx_u      $out0,$x00,$out
 1720     le?vperm    $out2,$out2,$out2,$inpperm
 1721     stvx_u      $out1,$x10,$out
 1722     le?vperm    $out3,$out3,$out3,$inpperm
 1723     stvx_u      $out2,$x20,$out
 1724     le?vperm    $out4,$out4,$out4,$inpperm
 1725     stvx_u      $out3,$x30,$out
 1726     le?vperm    $out5,$out5,$out5,$inpperm
 1727     stvx_u      $out4,$x40,$out
 1728     le?vperm    $out6,$out6,$out6,$inpperm
 1729     stvx_u      $out5,$x50,$out
 1730     stvx_u      $out6,$x60,$out
 1731     addi        $out,$out,0x70
 1732     b       Lctr32_enc8x_done
 1733 
 1734 .align  5
 1735 Lctr32_enc8x_six:
 1736     vcipherlast $out0,$out0,$in2
 1737     vcipherlast $out1,$out1,$in3
 1738     vcipherlast $out2,$out2,$in4
 1739     vcipherlast $out3,$out3,$in5
 1740     vcipherlast $out4,$out4,$in6
 1741     vcipherlast $out5,$out5,$in7
 1742 
 1743     le?vperm    $out0,$out0,$out0,$inpperm
 1744     le?vperm    $out1,$out1,$out1,$inpperm
 1745     stvx_u      $out0,$x00,$out
 1746     le?vperm    $out2,$out2,$out2,$inpperm
 1747     stvx_u      $out1,$x10,$out
 1748     le?vperm    $out3,$out3,$out3,$inpperm
 1749     stvx_u      $out2,$x20,$out
 1750     le?vperm    $out4,$out4,$out4,$inpperm
 1751     stvx_u      $out3,$x30,$out
 1752     le?vperm    $out5,$out5,$out5,$inpperm
 1753     stvx_u      $out4,$x40,$out
 1754     stvx_u      $out5,$x50,$out
 1755     addi        $out,$out,0x60
 1756     b       Lctr32_enc8x_done
 1757 
 1758 .align  5
 1759 Lctr32_enc8x_five:
 1760     vcipherlast $out0,$out0,$in3
 1761     vcipherlast $out1,$out1,$in4
 1762     vcipherlast $out2,$out2,$in5
 1763     vcipherlast $out3,$out3,$in6
 1764     vcipherlast $out4,$out4,$in7
 1765 
 1766     le?vperm    $out0,$out0,$out0,$inpperm
 1767     le?vperm    $out1,$out1,$out1,$inpperm
 1768     stvx_u      $out0,$x00,$out
 1769     le?vperm    $out2,$out2,$out2,$inpperm
 1770     stvx_u      $out1,$x10,$out
 1771     le?vperm    $out3,$out3,$out3,$inpperm
 1772     stvx_u      $out2,$x20,$out
 1773     le?vperm    $out4,$out4,$out4,$inpperm
 1774     stvx_u      $out3,$x30,$out
 1775     stvx_u      $out4,$x40,$out
 1776     addi        $out,$out,0x50
 1777     b       Lctr32_enc8x_done
 1778 
 1779 .align  5
 1780 Lctr32_enc8x_four:
 1781     vcipherlast $out0,$out0,$in4
 1782     vcipherlast $out1,$out1,$in5
 1783     vcipherlast $out2,$out2,$in6
 1784     vcipherlast $out3,$out3,$in7
 1785 
 1786     le?vperm    $out0,$out0,$out0,$inpperm
 1787     le?vperm    $out1,$out1,$out1,$inpperm
 1788     stvx_u      $out0,$x00,$out
 1789     le?vperm    $out2,$out2,$out2,$inpperm
 1790     stvx_u      $out1,$x10,$out
 1791     le?vperm    $out3,$out3,$out3,$inpperm
 1792     stvx_u      $out2,$x20,$out
 1793     stvx_u      $out3,$x30,$out
 1794     addi        $out,$out,0x40
 1795     b       Lctr32_enc8x_done
 1796 
 1797 .align  5
 1798 Lctr32_enc8x_three:
 1799     vcipherlast $out0,$out0,$in5
 1800     vcipherlast $out1,$out1,$in6
 1801     vcipherlast $out2,$out2,$in7
 1802 
 1803     le?vperm    $out0,$out0,$out0,$inpperm
 1804     le?vperm    $out1,$out1,$out1,$inpperm
 1805     stvx_u      $out0,$x00,$out
 1806     le?vperm    $out2,$out2,$out2,$inpperm
 1807     stvx_u      $out1,$x10,$out
 1808     stvx_u      $out2,$x20,$out
 1809     addi        $out,$out,0x30
 1810     b       Lcbc_dec8x_done
 1811 
 1812 .align  5
 1813 Lctr32_enc8x_two:
 1814     vcipherlast $out0,$out0,$in6
 1815     vcipherlast $out1,$out1,$in7
 1816 
 1817     le?vperm    $out0,$out0,$out0,$inpperm
 1818     le?vperm    $out1,$out1,$out1,$inpperm
 1819     stvx_u      $out0,$x00,$out
 1820     stvx_u      $out1,$x10,$out
 1821     addi        $out,$out,0x20
 1822     b       Lcbc_dec8x_done
 1823 
 1824 .align  5
 1825 Lctr32_enc8x_one:
 1826     vcipherlast $out0,$out0,$in7
 1827 
 1828     le?vperm    $out0,$out0,$out0,$inpperm
 1829     stvx_u      $out0,0,$out
 1830     addi        $out,$out,0x10
 1831 
 1832 Lctr32_enc8x_done:
 1833     li      r10,`$FRAME+15`
 1834     li      r11,`$FRAME+31`
 1835     stvx        $inpperm,r10,$sp    # wipe copies of round keys
 1836     addi        r10,r10,32
 1837     stvx        $inpperm,r11,$sp
 1838     addi        r11,r11,32
 1839     stvx        $inpperm,r10,$sp
 1840     addi        r10,r10,32
 1841     stvx        $inpperm,r11,$sp
 1842     addi        r11,r11,32
 1843     stvx        $inpperm,r10,$sp
 1844     addi        r10,r10,32
 1845     stvx        $inpperm,r11,$sp
 1846     addi        r11,r11,32
 1847     stvx        $inpperm,r10,$sp
 1848     addi        r10,r10,32
 1849     stvx        $inpperm,r11,$sp
 1850     addi        r11,r11,32
 1851 
 1852     mtspr       256,$vrsave
 1853     lvx     v20,r10,$sp     # ABI says so
 1854     addi        r10,r10,32
 1855     lvx     v21,r11,$sp
 1856     addi        r11,r11,32
 1857     lvx     v22,r10,$sp
 1858     addi        r10,r10,32
 1859     lvx     v23,r11,$sp
 1860     addi        r11,r11,32
 1861     lvx     v24,r10,$sp
 1862     addi        r10,r10,32
 1863     lvx     v25,r11,$sp
 1864     addi        r11,r11,32
 1865     lvx     v26,r10,$sp
 1866     addi        r10,r10,32
 1867     lvx     v27,r11,$sp
 1868     addi        r11,r11,32
 1869     lvx     v28,r10,$sp
 1870     addi        r10,r10,32
 1871     lvx     v29,r11,$sp
 1872     addi        r11,r11,32
 1873     lvx     v30,r10,$sp
 1874     lvx     v31,r11,$sp
 1875     $POP        r26,`$FRAME+21*16+0*$SIZE_T`($sp)
 1876     $POP        r27,`$FRAME+21*16+1*$SIZE_T`($sp)
 1877     $POP        r28,`$FRAME+21*16+2*$SIZE_T`($sp)
 1878     $POP        r29,`$FRAME+21*16+3*$SIZE_T`($sp)
 1879     $POP        r30,`$FRAME+21*16+4*$SIZE_T`($sp)
 1880     $POP        r31,`$FRAME+21*16+5*$SIZE_T`($sp)
 1881     addi        $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
 1882     blr
 1883     .long       0
 1884     .byte       0,12,0x04,0,0x80,6,6,0
 1885     .long       0
 1886 .size   .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
 1887 ___
 1888 }}  }}}
 1889 
 1890 my $consts=1;
 1891 foreach(split("\n",$code)) {
 1892         s/\`([^\`]*)\`/eval($1)/geo;
 1893 
 1894     # constants table endian-specific conversion
 1895     if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
 1896         my $conv=$3;
 1897         my @bytes=();
 1898 
 1899         # convert to endian-agnostic format
 1900         if ($1 eq "long") {
 1901           foreach (split(/,\s*/,$2)) {
 1902         my $l = /^0/?oct:int;
 1903         push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
 1904           }
 1905         } else {
 1906         @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
 1907         }
 1908 
 1909         # little-endian conversion
 1910         if ($flavour =~ /le$/o) {
 1911         SWITCH: for($conv)  {
 1912             /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
 1913             /\?rev/ && do   { @bytes=reverse(@bytes);    last; }; 
 1914         }
 1915         }
 1916 
 1917         #emit
 1918         print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
 1919         next;
 1920     }
 1921     $consts=0 if (m/Lconsts:/o);    # end of table
 1922 
 1923     # instructions prefixed with '?' are endian-specific and need
 1924     # to be adjusted accordingly...
 1925     if ($flavour =~ /le$/o) {   # little-endian
 1926         s/le\?//o       or
 1927         s/be\?/#be#/o   or
 1928         s/\?lvsr/lvsl/o or
 1929         s/\?lvsl/lvsr/o or
 1930         s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
 1931         s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
 1932         s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
 1933     } else {            # big-endian
 1934         s/le\?/#le#/o   or
 1935         s/be\?//o       or
 1936         s/\?([a-z]+)/$1/o;
 1937     }
 1938 
 1939         print $_,"\n";
 1940 }
 1941 
 1942 close STDOUT;