"Fossies" - the Fresh Open Source Software Archive

Member "openssl-1.0.2q/crypto/aes/asm/aes-ppc.pl" (20 Nov 2018, 39647 Bytes) of package /linux/misc/openssl-1.0.2q.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "aes-ppc.pl" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.1.0g_vs_1.1.1-pre2.

    1 #!/usr/bin/env perl
    2 
    3 # ====================================================================
    4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
    5 # project. The module is, however, dual licensed under OpenSSL and
    6 # CRYPTOGAMS licenses depending on where you obtain it. For further
    7 # details see http://www.openssl.org/~appro/cryptogams/.
    8 # ====================================================================
    9 
   10 # Needs more work: key setup, CBC routine...
   11 #
   12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
   13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
   14 # 4.0. But these are not the ones currently used! Their "compact"
   15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
   16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
   17 # at 1/3 of ppc_AES_decrypt.
   18 
   19 # February 2010
   20 #
   21 # Rescheduling instructions to favour Power6 pipeline gave 10%
   22 # performance improvement on the platfrom in question (and marginal
   23 # improvement even on others). It should be noted that Power6 fails
   24 # to process byte in 18 cycles, only in 23, because it fails to issue
   25 # 4 load instructions in two cycles, only in 3. As result non-compact
   26 # block subroutines are 25% slower than one would expect. Compact
   27 # functions scale better, because they have pure computational part,
   28 # which scales perfectly with clock frequency. To be specific
   29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
   30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
   31 
   32 $flavour = shift;
   33 
   34 if ($flavour =~ /64/) {
   35     $SIZE_T =8;
   36     $LRSAVE =2*$SIZE_T;
   37     $STU    ="stdu";
   38     $POP    ="ld";
   39     $PUSH   ="std";
   40 } elsif ($flavour =~ /32/) {
   41     $SIZE_T =4;
   42     $LRSAVE =$SIZE_T;
   43     $STU    ="stwu";
   44     $POP    ="lwz";
   45     $PUSH   ="stw";
   46 } else { die "nonsense $flavour"; }
   47 
   48 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
   49 
   50 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
   51 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
   52 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
   53 die "can't locate ppc-xlate.pl";
   54 
   55 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
   56 
   57 $FRAME=32*$SIZE_T;
   58 
   59 sub _data_word()
   60 { my $i;
   61     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
   62 }
   63 
   64 $sp="r1";
   65 $toc="r2";
   66 $inp="r3";
   67 $out="r4";
   68 $key="r5";
   69 
   70 $Tbl0="r3";
   71 $Tbl1="r6";
   72 $Tbl2="r7";
   73 $Tbl3=$out; # stay away from "r2"; $out is offloaded to stack
   74 
   75 $s0="r8";
   76 $s1="r9";
   77 $s2="r10";
   78 $s3="r11";
   79 
   80 $t0="r12";
   81 $t1="r0";   # stay away from "r13";
   82 $t2="r14";
   83 $t3="r15";
   84 
   85 $acc00="r16";
   86 $acc01="r17";
   87 $acc02="r18";
   88 $acc03="r19";
   89 
   90 $acc04="r20";
   91 $acc05="r21";
   92 $acc06="r22";
   93 $acc07="r23";
   94 
   95 $acc08="r24";
   96 $acc09="r25";
   97 $acc10="r26";
   98 $acc11="r27";
   99 
  100 $acc12="r28";
  101 $acc13="r29";
  102 $acc14="r30";
  103 $acc15="r31";
  104 
  105 $mask80=$Tbl2;
  106 $mask1b=$Tbl3;
  107 
  108 $code.=<<___;
  109 .machine    "any"
  110 .text
  111 
  112 .align  7
  113 LAES_Te:
  114     mflr    r0
  115     bcl 20,31,\$+4
  116     mflr    $Tbl0   ;    vvvvv "distance" between . and 1st data entry
  117     addi    $Tbl0,$Tbl0,`128-8`
  118     mtlr    r0
  119     blr
  120     .long   0
  121     .byte   0,12,0x14,0,0,0,0,0
  122     .space  `64-9*4`
  123 LAES_Td:
  124     mflr    r0
  125     bcl 20,31,\$+4
  126     mflr    $Tbl0   ;    vvvvvvvv "distance" between . and 1st data entry
  127     addi    $Tbl0,$Tbl0,`128-64-8+2048+256`
  128     mtlr    r0
  129     blr
  130     .long   0
  131     .byte   0,12,0x14,0,0,0,0,0
  132     .space  `128-64-9*4`
  133 ___
  134 &_data_word(
  135     0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
  136     0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
  137     0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
  138     0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
  139     0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
  140     0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
  141     0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
  142     0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
  143     0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
  144     0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
  145     0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
  146     0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  147     0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  148     0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  149     0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  150     0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  151     0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  152     0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  153     0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  154     0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  155     0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  156     0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  157     0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  158     0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  159     0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  160     0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  161     0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  162     0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  163     0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  164     0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  165     0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  166     0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  167     0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  168     0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  169     0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  170     0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  171     0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  172     0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  173     0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  174     0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  175     0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  176     0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  177     0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  178     0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  179     0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  180     0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  181     0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  182     0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  183     0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  184     0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  185     0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  186     0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  187     0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  188     0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  189     0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  190     0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  191     0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  192     0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  193     0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  194     0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  195     0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  196     0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  197     0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  198     0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  199 $code.=<<___;
  200 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  201 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  202 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  203 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  204 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  205 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  206 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  207 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  208 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  209 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  210 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  211 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  212 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  213 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  214 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  215 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  216 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  217 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  218 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  219 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  220 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  221 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  222 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  223 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  224 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  225 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  226 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  227 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  228 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  229 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  230 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  231 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  232 ___
  233 &_data_word(
  234     0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  235     0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  236     0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  237     0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  238     0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  239     0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  240     0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  241     0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  242     0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  243     0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  244     0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  245     0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  246     0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  247     0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  248     0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  249     0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  250     0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  251     0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  252     0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  253     0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  254     0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  255     0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  256     0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  257     0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  258     0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  259     0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  260     0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  261     0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  262     0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  263     0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  264     0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  265     0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  266     0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  267     0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  268     0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  269     0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  270     0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  271     0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  272     0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  273     0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  274     0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  275     0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  276     0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  277     0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  278     0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  279     0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  280     0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  281     0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  282     0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  283     0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  284     0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  285     0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  286     0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  287     0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  288     0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  289     0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  290     0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  291     0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  292     0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  293     0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  294     0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  295     0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  296     0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  297     0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  298 $code.=<<___;
  299 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  300 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  301 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  302 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  303 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  304 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  305 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  306 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  307 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  308 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  309 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  310 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  311 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  312 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  313 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  314 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  315 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  316 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  317 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  318 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  319 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  320 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  321 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  322 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  323 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  324 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  325 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  326 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  327 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  328 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  329 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  330 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  331 
  332 
  333 .globl  .AES_encrypt
  334 .align  7
  335 .AES_encrypt:
  336     $STU    $sp,-$FRAME($sp)
  337     mflr    r0
  338 
  339     $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
  340     $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
  341     $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
  342     $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
  343     $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
  344     $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
  345     $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
  346     $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
  347     $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
  348     $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
  349     $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
  350     $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
  351     $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
  352     $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
  353     $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
  354     $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
  355     $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
  356     $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
  357     $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
  358     $PUSH   r0,`$FRAME+$LRSAVE`($sp)
  359 
  360     andi.   $t0,$inp,3
  361     andi.   $t1,$out,3
  362     or. $t0,$t0,$t1
  363     bne Lenc_unaligned
  364 
  365 Lenc_unaligned_ok:
  366 ___
  367 $code.=<<___ if (!$LITTLE_ENDIAN);
  368     lwz $s0,0($inp)
  369     lwz $s1,4($inp)
  370     lwz $s2,8($inp)
  371     lwz $s3,12($inp)
  372 ___
  373 $code.=<<___ if ($LITTLE_ENDIAN);
  374     lwz $t0,0($inp)
  375     lwz $t1,4($inp)
  376     lwz $t2,8($inp)
  377     lwz $t3,12($inp)
  378     rotlwi  $s0,$t0,8
  379     rotlwi  $s1,$t1,8
  380     rotlwi  $s2,$t2,8
  381     rotlwi  $s3,$t3,8
  382     rlwimi  $s0,$t0,24,0,7
  383     rlwimi  $s1,$t1,24,0,7
  384     rlwimi  $s2,$t2,24,0,7
  385     rlwimi  $s3,$t3,24,0,7
  386     rlwimi  $s0,$t0,24,16,23
  387     rlwimi  $s1,$t1,24,16,23
  388     rlwimi  $s2,$t2,24,16,23
  389     rlwimi  $s3,$t3,24,16,23
  390 ___
  391 $code.=<<___;
  392     bl  LAES_Te
  393     bl  Lppc_AES_encrypt_compact
  394     $POP    $out,`$FRAME-$SIZE_T*19`($sp)
  395 ___
  396 $code.=<<___ if ($LITTLE_ENDIAN);
  397     rotlwi  $t0,$s0,8
  398     rotlwi  $t1,$s1,8
  399     rotlwi  $t2,$s2,8
  400     rotlwi  $t3,$s3,8
  401     rlwimi  $t0,$s0,24,0,7
  402     rlwimi  $t1,$s1,24,0,7
  403     rlwimi  $t2,$s2,24,0,7
  404     rlwimi  $t3,$s3,24,0,7
  405     rlwimi  $t0,$s0,24,16,23
  406     rlwimi  $t1,$s1,24,16,23
  407     rlwimi  $t2,$s2,24,16,23
  408     rlwimi  $t3,$s3,24,16,23
  409     stw $t0,0($out)
  410     stw $t1,4($out)
  411     stw $t2,8($out)
  412     stw $t3,12($out)
  413 ___
  414 $code.=<<___ if (!$LITTLE_ENDIAN);
  415     stw $s0,0($out)
  416     stw $s1,4($out)
  417     stw $s2,8($out)
  418     stw $s3,12($out)
  419 ___
  420 $code.=<<___;
  421     b   Lenc_done
  422 
  423 Lenc_unaligned:
  424     subfic  $t0,$inp,4096
  425     subfic  $t1,$out,4096
  426     andi.   $t0,$t0,4096-16
  427     beq Lenc_xpage
  428     andi.   $t1,$t1,4096-16
  429     bne Lenc_unaligned_ok
  430 
  431 Lenc_xpage:
  432     lbz $acc00,0($inp)
  433     lbz $acc01,1($inp)
  434     lbz $acc02,2($inp)
  435     lbz $s0,3($inp)
  436     lbz $acc04,4($inp)
  437     lbz $acc05,5($inp)
  438     lbz $acc06,6($inp)
  439     lbz $s1,7($inp)
  440     lbz $acc08,8($inp)
  441     lbz $acc09,9($inp)
  442     lbz $acc10,10($inp)
  443     insrwi  $s0,$acc00,8,0
  444     lbz $s2,11($inp)
  445     insrwi  $s1,$acc04,8,0
  446     lbz $acc12,12($inp)
  447     insrwi  $s0,$acc01,8,8
  448     lbz $acc13,13($inp)
  449     insrwi  $s1,$acc05,8,8
  450     lbz $acc14,14($inp)
  451     insrwi  $s0,$acc02,8,16
  452     lbz $s3,15($inp)
  453     insrwi  $s1,$acc06,8,16
  454     insrwi  $s2,$acc08,8,0
  455     insrwi  $s3,$acc12,8,0
  456     insrwi  $s2,$acc09,8,8
  457     insrwi  $s3,$acc13,8,8
  458     insrwi  $s2,$acc10,8,16
  459     insrwi  $s3,$acc14,8,16
  460 
  461     bl  LAES_Te
  462     bl  Lppc_AES_encrypt_compact
  463     $POP    $out,`$FRAME-$SIZE_T*19`($sp)
  464 
  465     extrwi  $acc00,$s0,8,0
  466     extrwi  $acc01,$s0,8,8
  467     stb $acc00,0($out)
  468     extrwi  $acc02,$s0,8,16
  469     stb $acc01,1($out)
  470     stb $acc02,2($out)
  471     extrwi  $acc04,$s1,8,0
  472     stb $s0,3($out)
  473     extrwi  $acc05,$s1,8,8
  474     stb $acc04,4($out)
  475     extrwi  $acc06,$s1,8,16
  476     stb $acc05,5($out)
  477     stb $acc06,6($out)
  478     extrwi  $acc08,$s2,8,0
  479     stb $s1,7($out)
  480     extrwi  $acc09,$s2,8,8
  481     stb $acc08,8($out)
  482     extrwi  $acc10,$s2,8,16
  483     stb $acc09,9($out)
  484     stb $acc10,10($out)
  485     extrwi  $acc12,$s3,8,0
  486     stb $s2,11($out)
  487     extrwi  $acc13,$s3,8,8
  488     stb $acc12,12($out)
  489     extrwi  $acc14,$s3,8,16
  490     stb $acc13,13($out)
  491     stb $acc14,14($out)
  492     stb $s3,15($out)
  493 
  494 Lenc_done:
  495     $POP    r0,`$FRAME+$LRSAVE`($sp)
  496     $POP    r14,`$FRAME-$SIZE_T*18`($sp)
  497     $POP    r15,`$FRAME-$SIZE_T*17`($sp)
  498     $POP    r16,`$FRAME-$SIZE_T*16`($sp)
  499     $POP    r17,`$FRAME-$SIZE_T*15`($sp)
  500     $POP    r18,`$FRAME-$SIZE_T*14`($sp)
  501     $POP    r19,`$FRAME-$SIZE_T*13`($sp)
  502     $POP    r20,`$FRAME-$SIZE_T*12`($sp)
  503     $POP    r21,`$FRAME-$SIZE_T*11`($sp)
  504     $POP    r22,`$FRAME-$SIZE_T*10`($sp)
  505     $POP    r23,`$FRAME-$SIZE_T*9`($sp)
  506     $POP    r24,`$FRAME-$SIZE_T*8`($sp)
  507     $POP    r25,`$FRAME-$SIZE_T*7`($sp)
  508     $POP    r26,`$FRAME-$SIZE_T*6`($sp)
  509     $POP    r27,`$FRAME-$SIZE_T*5`($sp)
  510     $POP    r28,`$FRAME-$SIZE_T*4`($sp)
  511     $POP    r29,`$FRAME-$SIZE_T*3`($sp)
  512     $POP    r30,`$FRAME-$SIZE_T*2`($sp)
  513     $POP    r31,`$FRAME-$SIZE_T*1`($sp)
  514     mtlr    r0
  515     addi    $sp,$sp,$FRAME
  516     blr
  517     .long   0
  518     .byte   0,12,4,1,0x80,18,3,0
  519     .long   0
  520 
  521 .align  5
  522 Lppc_AES_encrypt:
  523     lwz $acc00,240($key)
  524     addi    $Tbl1,$Tbl0,3
  525     lwz $t0,0($key)
  526     addi    $Tbl2,$Tbl0,2
  527     lwz $t1,4($key)
  528     addi    $Tbl3,$Tbl0,1
  529     lwz $t2,8($key)
  530     addi    $acc00,$acc00,-1
  531     lwz $t3,12($key)
  532     addi    $key,$key,16
  533     xor $s0,$s0,$t0
  534     xor $s1,$s1,$t1
  535     xor $s2,$s2,$t2
  536     xor $s3,$s3,$t3
  537     mtctr   $acc00
  538 .align  4
  539 Lenc_loop:
  540     rlwinm  $acc00,$s0,`32-24+3`,21,28
  541     rlwinm  $acc01,$s1,`32-24+3`,21,28
  542     rlwinm  $acc02,$s2,`32-24+3`,21,28
  543     rlwinm  $acc03,$s3,`32-24+3`,21,28
  544     lwz $t0,0($key)
  545     rlwinm  $acc04,$s1,`32-16+3`,21,28
  546     lwz $t1,4($key)
  547     rlwinm  $acc05,$s2,`32-16+3`,21,28
  548     lwz $t2,8($key)
  549     rlwinm  $acc06,$s3,`32-16+3`,21,28
  550     lwz $t3,12($key)
  551     rlwinm  $acc07,$s0,`32-16+3`,21,28
  552     lwzx    $acc00,$Tbl0,$acc00
  553     rlwinm  $acc08,$s2,`32-8+3`,21,28
  554     lwzx    $acc01,$Tbl0,$acc01
  555     rlwinm  $acc09,$s3,`32-8+3`,21,28
  556     lwzx    $acc02,$Tbl0,$acc02
  557     rlwinm  $acc10,$s0,`32-8+3`,21,28
  558     lwzx    $acc03,$Tbl0,$acc03
  559     rlwinm  $acc11,$s1,`32-8+3`,21,28
  560     lwzx    $acc04,$Tbl1,$acc04
  561     rlwinm  $acc12,$s3,`0+3`,21,28
  562     lwzx    $acc05,$Tbl1,$acc05
  563     rlwinm  $acc13,$s0,`0+3`,21,28
  564     lwzx    $acc06,$Tbl1,$acc06
  565     rlwinm  $acc14,$s1,`0+3`,21,28
  566     lwzx    $acc07,$Tbl1,$acc07
  567     rlwinm  $acc15,$s2,`0+3`,21,28
  568     lwzx    $acc08,$Tbl2,$acc08
  569     xor $t0,$t0,$acc00
  570     lwzx    $acc09,$Tbl2,$acc09
  571     xor $t1,$t1,$acc01
  572     lwzx    $acc10,$Tbl2,$acc10
  573     xor $t2,$t2,$acc02
  574     lwzx    $acc11,$Tbl2,$acc11
  575     xor $t3,$t3,$acc03
  576     lwzx    $acc12,$Tbl3,$acc12
  577     xor $t0,$t0,$acc04
  578     lwzx    $acc13,$Tbl3,$acc13
  579     xor $t1,$t1,$acc05
  580     lwzx    $acc14,$Tbl3,$acc14
  581     xor $t2,$t2,$acc06
  582     lwzx    $acc15,$Tbl3,$acc15
  583     xor $t3,$t3,$acc07
  584     xor $t0,$t0,$acc08
  585     xor $t1,$t1,$acc09
  586     xor $t2,$t2,$acc10
  587     xor $t3,$t3,$acc11
  588     xor $s0,$t0,$acc12
  589     xor $s1,$t1,$acc13
  590     xor $s2,$t2,$acc14
  591     xor $s3,$t3,$acc15
  592     addi    $key,$key,16
  593     bdnz    Lenc_loop
  594 
  595     addi    $Tbl2,$Tbl0,2048
  596     nop
  597     lwz $t0,0($key)
  598     rlwinm  $acc00,$s0,`32-24`,24,31
  599     lwz $t1,4($key)
  600     rlwinm  $acc01,$s1,`32-24`,24,31
  601     lwz $t2,8($key)
  602     rlwinm  $acc02,$s2,`32-24`,24,31
  603     lwz $t3,12($key)
  604     rlwinm  $acc03,$s3,`32-24`,24,31
  605     lwz $acc08,`2048+0`($Tbl0)  ! prefetch Te4
  606     rlwinm  $acc04,$s1,`32-16`,24,31
  607     lwz $acc09,`2048+32`($Tbl0)
  608     rlwinm  $acc05,$s2,`32-16`,24,31
  609     lwz $acc10,`2048+64`($Tbl0)
  610     rlwinm  $acc06,$s3,`32-16`,24,31
  611     lwz $acc11,`2048+96`($Tbl0)
  612     rlwinm  $acc07,$s0,`32-16`,24,31
  613     lwz $acc12,`2048+128`($Tbl0)
  614     rlwinm  $acc08,$s2,`32-8`,24,31
  615     lwz $acc13,`2048+160`($Tbl0)
  616     rlwinm  $acc09,$s3,`32-8`,24,31
  617     lwz $acc14,`2048+192`($Tbl0)
  618     rlwinm  $acc10,$s0,`32-8`,24,31
  619     lwz $acc15,`2048+224`($Tbl0)
  620     rlwinm  $acc11,$s1,`32-8`,24,31
  621     lbzx    $acc00,$Tbl2,$acc00
  622     rlwinm  $acc12,$s3,`0`,24,31
  623     lbzx    $acc01,$Tbl2,$acc01
  624     rlwinm  $acc13,$s0,`0`,24,31
  625     lbzx    $acc02,$Tbl2,$acc02
  626     rlwinm  $acc14,$s1,`0`,24,31
  627     lbzx    $acc03,$Tbl2,$acc03
  628     rlwinm  $acc15,$s2,`0`,24,31
  629     lbzx    $acc04,$Tbl2,$acc04
  630     rlwinm  $s0,$acc00,24,0,7
  631     lbzx    $acc05,$Tbl2,$acc05
  632     rlwinm  $s1,$acc01,24,0,7
  633     lbzx    $acc06,$Tbl2,$acc06
  634     rlwinm  $s2,$acc02,24,0,7
  635     lbzx    $acc07,$Tbl2,$acc07
  636     rlwinm  $s3,$acc03,24,0,7
  637     lbzx    $acc08,$Tbl2,$acc08
  638     rlwimi  $s0,$acc04,16,8,15
  639     lbzx    $acc09,$Tbl2,$acc09
  640     rlwimi  $s1,$acc05,16,8,15
  641     lbzx    $acc10,$Tbl2,$acc10
  642     rlwimi  $s2,$acc06,16,8,15
  643     lbzx    $acc11,$Tbl2,$acc11
  644     rlwimi  $s3,$acc07,16,8,15
  645     lbzx    $acc12,$Tbl2,$acc12
  646     rlwimi  $s0,$acc08,8,16,23
  647     lbzx    $acc13,$Tbl2,$acc13
  648     rlwimi  $s1,$acc09,8,16,23
  649     lbzx    $acc14,$Tbl2,$acc14
  650     rlwimi  $s2,$acc10,8,16,23
  651     lbzx    $acc15,$Tbl2,$acc15
  652     rlwimi  $s3,$acc11,8,16,23
  653     or  $s0,$s0,$acc12
  654     or  $s1,$s1,$acc13
  655     or  $s2,$s2,$acc14
  656     or  $s3,$s3,$acc15
  657     xor $s0,$s0,$t0
  658     xor $s1,$s1,$t1
  659     xor $s2,$s2,$t2
  660     xor $s3,$s3,$t3
  661     blr
  662     .long   0
  663     .byte   0,12,0x14,0,0,0,0,0
  664 
  665 .align  4
  666 Lppc_AES_encrypt_compact:
  667     lwz $acc00,240($key)
  668     addi    $Tbl1,$Tbl0,2048
  669     lwz $t0,0($key)
  670     lis $mask80,0x8080
  671     lwz $t1,4($key)
  672     lis $mask1b,0x1b1b
  673     lwz $t2,8($key)
  674     ori $mask80,$mask80,0x8080
  675     lwz $t3,12($key)
  676     ori $mask1b,$mask1b,0x1b1b
  677     addi    $key,$key,16
  678     mtctr   $acc00
  679 .align  4
  680 Lenc_compact_loop:
  681     xor $s0,$s0,$t0
  682     xor $s1,$s1,$t1
  683     rlwinm  $acc00,$s0,`32-24`,24,31
  684     xor $s2,$s2,$t2
  685     rlwinm  $acc01,$s1,`32-24`,24,31
  686     xor $s3,$s3,$t3
  687     rlwinm  $acc02,$s2,`32-24`,24,31
  688     rlwinm  $acc03,$s3,`32-24`,24,31
  689     rlwinm  $acc04,$s1,`32-16`,24,31
  690     rlwinm  $acc05,$s2,`32-16`,24,31
  691     rlwinm  $acc06,$s3,`32-16`,24,31
  692     rlwinm  $acc07,$s0,`32-16`,24,31
  693     lbzx    $acc00,$Tbl1,$acc00
  694     rlwinm  $acc08,$s2,`32-8`,24,31
  695     lbzx    $acc01,$Tbl1,$acc01
  696     rlwinm  $acc09,$s3,`32-8`,24,31
  697     lbzx    $acc02,$Tbl1,$acc02
  698     rlwinm  $acc10,$s0,`32-8`,24,31
  699     lbzx    $acc03,$Tbl1,$acc03
  700     rlwinm  $acc11,$s1,`32-8`,24,31
  701     lbzx    $acc04,$Tbl1,$acc04
  702     rlwinm  $acc12,$s3,`0`,24,31
  703     lbzx    $acc05,$Tbl1,$acc05
  704     rlwinm  $acc13,$s0,`0`,24,31
  705     lbzx    $acc06,$Tbl1,$acc06
  706     rlwinm  $acc14,$s1,`0`,24,31
  707     lbzx    $acc07,$Tbl1,$acc07
  708     rlwinm  $acc15,$s2,`0`,24,31
  709     lbzx    $acc08,$Tbl1,$acc08
  710     rlwinm  $s0,$acc00,24,0,7
  711     lbzx    $acc09,$Tbl1,$acc09
  712     rlwinm  $s1,$acc01,24,0,7
  713     lbzx    $acc10,$Tbl1,$acc10
  714     rlwinm  $s2,$acc02,24,0,7
  715     lbzx    $acc11,$Tbl1,$acc11
  716     rlwinm  $s3,$acc03,24,0,7
  717     lbzx    $acc12,$Tbl1,$acc12
  718     rlwimi  $s0,$acc04,16,8,15
  719     lbzx    $acc13,$Tbl1,$acc13
  720     rlwimi  $s1,$acc05,16,8,15
  721     lbzx    $acc14,$Tbl1,$acc14
  722     rlwimi  $s2,$acc06,16,8,15
  723     lbzx    $acc15,$Tbl1,$acc15
  724     rlwimi  $s3,$acc07,16,8,15
  725     rlwimi  $s0,$acc08,8,16,23
  726     rlwimi  $s1,$acc09,8,16,23
  727     rlwimi  $s2,$acc10,8,16,23
  728     rlwimi  $s3,$acc11,8,16,23
  729     lwz $t0,0($key)
  730     or  $s0,$s0,$acc12
  731     lwz $t1,4($key)
  732     or  $s1,$s1,$acc13
  733     lwz $t2,8($key)
  734     or  $s2,$s2,$acc14
  735     lwz $t3,12($key)
  736     or  $s3,$s3,$acc15
  737 
  738     addi    $key,$key,16
  739     bdz Lenc_compact_done
  740 
  741     and $acc00,$s0,$mask80  # r1=r0&0x80808080
  742     and $acc01,$s1,$mask80
  743     and $acc02,$s2,$mask80
  744     and $acc03,$s3,$mask80
  745     srwi    $acc04,$acc00,7     # r1>>7
  746     andc    $acc08,$s0,$mask80  # r0&0x7f7f7f7f
  747     srwi    $acc05,$acc01,7
  748     andc    $acc09,$s1,$mask80
  749     srwi    $acc06,$acc02,7
  750     andc    $acc10,$s2,$mask80
  751     srwi    $acc07,$acc03,7
  752     andc    $acc11,$s3,$mask80
  753     sub $acc00,$acc00,$acc04    # r1-(r1>>7)
  754     sub $acc01,$acc01,$acc05
  755     sub $acc02,$acc02,$acc06
  756     sub $acc03,$acc03,$acc07
  757     add $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
  758     add $acc09,$acc09,$acc09
  759     add $acc10,$acc10,$acc10
  760     add $acc11,$acc11,$acc11
  761     and $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
  762     and $acc01,$acc01,$mask1b
  763     and $acc02,$acc02,$mask1b
  764     and $acc03,$acc03,$mask1b
  765     xor $acc00,$acc00,$acc08    # r2
  766     xor $acc01,$acc01,$acc09
  767      rotlwi $acc12,$s0,16       # ROTATE(r0,16)
  768     xor $acc02,$acc02,$acc10
  769      rotlwi $acc13,$s1,16
  770     xor $acc03,$acc03,$acc11
  771      rotlwi $acc14,$s2,16
  772 
  773     xor $s0,$s0,$acc00      # r0^r2
  774     rotlwi  $acc15,$s3,16
  775     xor $s1,$s1,$acc01
  776     rotrwi  $s0,$s0,24      # ROTATE(r2^r0,24)
  777     xor $s2,$s2,$acc02
  778     rotrwi  $s1,$s1,24
  779     xor $s3,$s3,$acc03
  780     rotrwi  $s2,$s2,24
  781     xor $s0,$s0,$acc00      # ROTATE(r2^r0,24)^r2
  782     rotrwi  $s3,$s3,24
  783     xor $s1,$s1,$acc01
  784     xor $s2,$s2,$acc02
  785     xor $s3,$s3,$acc03
  786     rotlwi  $acc08,$acc12,8     # ROTATE(r0,24)
  787     xor $s0,$s0,$acc12      #
  788     rotlwi  $acc09,$acc13,8
  789     xor $s1,$s1,$acc13
  790     rotlwi  $acc10,$acc14,8
  791     xor $s2,$s2,$acc14
  792     rotlwi  $acc11,$acc15,8
  793     xor $s3,$s3,$acc15
  794     xor $s0,$s0,$acc08      #
  795     xor $s1,$s1,$acc09
  796     xor $s2,$s2,$acc10
  797     xor $s3,$s3,$acc11
  798 
  799     b   Lenc_compact_loop
  800 .align  4
  801 Lenc_compact_done:
  802     xor $s0,$s0,$t0
  803     xor $s1,$s1,$t1
  804     xor $s2,$s2,$t2
  805     xor $s3,$s3,$t3
  806     blr
  807     .long   0
  808     .byte   0,12,0x14,0,0,0,0,0
  809 .size   .AES_encrypt,.-.AES_encrypt
  810 
  811 .globl  .AES_decrypt
  812 .align  7
  813 .AES_decrypt:
  814     $STU    $sp,-$FRAME($sp)
  815     mflr    r0
  816 
  817     $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
  818     $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
  819     $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
  820     $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
  821     $PUSH   r17,`$FRAME-$SIZE_T*15`($sp)
  822     $PUSH   r18,`$FRAME-$SIZE_T*14`($sp)
  823     $PUSH   r19,`$FRAME-$SIZE_T*13`($sp)
  824     $PUSH   r20,`$FRAME-$SIZE_T*12`($sp)
  825     $PUSH   r21,`$FRAME-$SIZE_T*11`($sp)
  826     $PUSH   r22,`$FRAME-$SIZE_T*10`($sp)
  827     $PUSH   r23,`$FRAME-$SIZE_T*9`($sp)
  828     $PUSH   r24,`$FRAME-$SIZE_T*8`($sp)
  829     $PUSH   r25,`$FRAME-$SIZE_T*7`($sp)
  830     $PUSH   r26,`$FRAME-$SIZE_T*6`($sp)
  831     $PUSH   r27,`$FRAME-$SIZE_T*5`($sp)
  832     $PUSH   r28,`$FRAME-$SIZE_T*4`($sp)
  833     $PUSH   r29,`$FRAME-$SIZE_T*3`($sp)
  834     $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
  835     $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
  836     $PUSH   r0,`$FRAME+$LRSAVE`($sp)
  837 
  838     andi.   $t0,$inp,3
  839     andi.   $t1,$out,3
  840     or. $t0,$t0,$t1
  841     bne Ldec_unaligned
  842 
  843 Ldec_unaligned_ok:
  844 ___
  845 $code.=<<___ if (!$LITTLE_ENDIAN);
  846     lwz $s0,0($inp)
  847     lwz $s1,4($inp)
  848     lwz $s2,8($inp)
  849     lwz $s3,12($inp)
  850 ___
  851 $code.=<<___ if ($LITTLE_ENDIAN);
  852     lwz $t0,0($inp)
  853     lwz $t1,4($inp)
  854     lwz $t2,8($inp)
  855     lwz $t3,12($inp)
  856     rotlwi  $s0,$t0,8
  857     rotlwi  $s1,$t1,8
  858     rotlwi  $s2,$t2,8
  859     rotlwi  $s3,$t3,8
  860     rlwimi  $s0,$t0,24,0,7
  861     rlwimi  $s1,$t1,24,0,7
  862     rlwimi  $s2,$t2,24,0,7
  863     rlwimi  $s3,$t3,24,0,7
  864     rlwimi  $s0,$t0,24,16,23
  865     rlwimi  $s1,$t1,24,16,23
  866     rlwimi  $s2,$t2,24,16,23
  867     rlwimi  $s3,$t3,24,16,23
  868 ___
  869 $code.=<<___;
  870     bl  LAES_Td
  871     bl  Lppc_AES_decrypt_compact
  872     $POP    $out,`$FRAME-$SIZE_T*19`($sp)
  873 ___
  874 $code.=<<___ if ($LITTLE_ENDIAN);
  875     rotlwi  $t0,$s0,8
  876     rotlwi  $t1,$s1,8
  877     rotlwi  $t2,$s2,8
  878     rotlwi  $t3,$s3,8
  879     rlwimi  $t0,$s0,24,0,7
  880     rlwimi  $t1,$s1,24,0,7
  881     rlwimi  $t2,$s2,24,0,7
  882     rlwimi  $t3,$s3,24,0,7
  883     rlwimi  $t0,$s0,24,16,23
  884     rlwimi  $t1,$s1,24,16,23
  885     rlwimi  $t2,$s2,24,16,23
  886     rlwimi  $t3,$s3,24,16,23
  887     stw $t0,0($out)
  888     stw $t1,4($out)
  889     stw $t2,8($out)
  890     stw $t3,12($out)
  891 ___
  892 $code.=<<___ if (!$LITTLE_ENDIAN);
  893     stw $s0,0($out)
  894     stw $s1,4($out)
  895     stw $s2,8($out)
  896     stw $s3,12($out)
  897 ___
  898 $code.=<<___;
  899     b   Ldec_done
  900 
  901 Ldec_unaligned:
  902     subfic  $t0,$inp,4096
  903     subfic  $t1,$out,4096
  904     andi.   $t0,$t0,4096-16
  905     beq Ldec_xpage
  906     andi.   $t1,$t1,4096-16
  907     bne Ldec_unaligned_ok
  908 
  909 Ldec_xpage:
  910     lbz $acc00,0($inp)
  911     lbz $acc01,1($inp)
  912     lbz $acc02,2($inp)
  913     lbz $s0,3($inp)
  914     lbz $acc04,4($inp)
  915     lbz $acc05,5($inp)
  916     lbz $acc06,6($inp)
  917     lbz $s1,7($inp)
  918     lbz $acc08,8($inp)
  919     lbz $acc09,9($inp)
  920     lbz $acc10,10($inp)
  921     insrwi  $s0,$acc00,8,0
  922     lbz $s2,11($inp)
  923     insrwi  $s1,$acc04,8,0
  924     lbz $acc12,12($inp)
  925     insrwi  $s0,$acc01,8,8
  926     lbz $acc13,13($inp)
  927     insrwi  $s1,$acc05,8,8
  928     lbz $acc14,14($inp)
  929     insrwi  $s0,$acc02,8,16
  930     lbz $s3,15($inp)
  931     insrwi  $s1,$acc06,8,16
  932     insrwi  $s2,$acc08,8,0
  933     insrwi  $s3,$acc12,8,0
  934     insrwi  $s2,$acc09,8,8
  935     insrwi  $s3,$acc13,8,8
  936     insrwi  $s2,$acc10,8,16
  937     insrwi  $s3,$acc14,8,16
  938 
  939     bl  LAES_Td
  940     bl  Lppc_AES_decrypt_compact
  941     $POP    $out,`$FRAME-$SIZE_T*19`($sp)
  942 
  943     extrwi  $acc00,$s0,8,0
  944     extrwi  $acc01,$s0,8,8
  945     stb $acc00,0($out)
  946     extrwi  $acc02,$s0,8,16
  947     stb $acc01,1($out)
  948     stb $acc02,2($out)
  949     extrwi  $acc04,$s1,8,0
  950     stb $s0,3($out)
  951     extrwi  $acc05,$s1,8,8
  952     stb $acc04,4($out)
  953     extrwi  $acc06,$s1,8,16
  954     stb $acc05,5($out)
  955     stb $acc06,6($out)
  956     extrwi  $acc08,$s2,8,0
  957     stb $s1,7($out)
  958     extrwi  $acc09,$s2,8,8
  959     stb $acc08,8($out)
  960     extrwi  $acc10,$s2,8,16
  961     stb $acc09,9($out)
  962     stb $acc10,10($out)
  963     extrwi  $acc12,$s3,8,0
  964     stb $s2,11($out)
  965     extrwi  $acc13,$s3,8,8
  966     stb $acc12,12($out)
  967     extrwi  $acc14,$s3,8,16
  968     stb $acc13,13($out)
  969     stb $acc14,14($out)
  970     stb $s3,15($out)
  971 
  972 Ldec_done:
  973     $POP    r0,`$FRAME+$LRSAVE`($sp)
  974     $POP    r14,`$FRAME-$SIZE_T*18`($sp)
  975     $POP    r15,`$FRAME-$SIZE_T*17`($sp)
  976     $POP    r16,`$FRAME-$SIZE_T*16`($sp)
  977     $POP    r17,`$FRAME-$SIZE_T*15`($sp)
  978     $POP    r18,`$FRAME-$SIZE_T*14`($sp)
  979     $POP    r19,`$FRAME-$SIZE_T*13`($sp)
  980     $POP    r20,`$FRAME-$SIZE_T*12`($sp)
  981     $POP    r21,`$FRAME-$SIZE_T*11`($sp)
  982     $POP    r22,`$FRAME-$SIZE_T*10`($sp)
  983     $POP    r23,`$FRAME-$SIZE_T*9`($sp)
  984     $POP    r24,`$FRAME-$SIZE_T*8`($sp)
  985     $POP    r25,`$FRAME-$SIZE_T*7`($sp)
  986     $POP    r26,`$FRAME-$SIZE_T*6`($sp)
  987     $POP    r27,`$FRAME-$SIZE_T*5`($sp)
  988     $POP    r28,`$FRAME-$SIZE_T*4`($sp)
  989     $POP    r29,`$FRAME-$SIZE_T*3`($sp)
  990     $POP    r30,`$FRAME-$SIZE_T*2`($sp)
  991     $POP    r31,`$FRAME-$SIZE_T*1`($sp)
  992     mtlr    r0
  993     addi    $sp,$sp,$FRAME
  994     blr
  995     .long   0
  996     .byte   0,12,4,1,0x80,18,3,0
  997     .long   0
  998 
  999 .align  5
 1000 Lppc_AES_decrypt:
 1001     lwz $acc00,240($key)
 1002     addi    $Tbl1,$Tbl0,3
 1003     lwz $t0,0($key)
 1004     addi    $Tbl2,$Tbl0,2
 1005     lwz $t1,4($key)
 1006     addi    $Tbl3,$Tbl0,1
 1007     lwz $t2,8($key)
 1008     addi    $acc00,$acc00,-1
 1009     lwz $t3,12($key)
 1010     addi    $key,$key,16
 1011     xor $s0,$s0,$t0
 1012     xor $s1,$s1,$t1
 1013     xor $s2,$s2,$t2
 1014     xor $s3,$s3,$t3
 1015     mtctr   $acc00
 1016 .align  4
 1017 Ldec_loop:
 1018     rlwinm  $acc00,$s0,`32-24+3`,21,28
 1019     rlwinm  $acc01,$s1,`32-24+3`,21,28
 1020     rlwinm  $acc02,$s2,`32-24+3`,21,28
 1021     rlwinm  $acc03,$s3,`32-24+3`,21,28
 1022     lwz $t0,0($key)
 1023     rlwinm  $acc04,$s3,`32-16+3`,21,28
 1024     lwz $t1,4($key)
 1025     rlwinm  $acc05,$s0,`32-16+3`,21,28
 1026     lwz $t2,8($key)
 1027     rlwinm  $acc06,$s1,`32-16+3`,21,28
 1028     lwz $t3,12($key)
 1029     rlwinm  $acc07,$s2,`32-16+3`,21,28
 1030     lwzx    $acc00,$Tbl0,$acc00
 1031     rlwinm  $acc08,$s2,`32-8+3`,21,28
 1032     lwzx    $acc01,$Tbl0,$acc01
 1033     rlwinm  $acc09,$s3,`32-8+3`,21,28
 1034     lwzx    $acc02,$Tbl0,$acc02
 1035     rlwinm  $acc10,$s0,`32-8+3`,21,28
 1036     lwzx    $acc03,$Tbl0,$acc03
 1037     rlwinm  $acc11,$s1,`32-8+3`,21,28
 1038     lwzx    $acc04,$Tbl1,$acc04
 1039     rlwinm  $acc12,$s1,`0+3`,21,28
 1040     lwzx    $acc05,$Tbl1,$acc05
 1041     rlwinm  $acc13,$s2,`0+3`,21,28
 1042     lwzx    $acc06,$Tbl1,$acc06
 1043     rlwinm  $acc14,$s3,`0+3`,21,28
 1044     lwzx    $acc07,$Tbl1,$acc07
 1045     rlwinm  $acc15,$s0,`0+3`,21,28
 1046     lwzx    $acc08,$Tbl2,$acc08
 1047     xor $t0,$t0,$acc00
 1048     lwzx    $acc09,$Tbl2,$acc09
 1049     xor $t1,$t1,$acc01
 1050     lwzx    $acc10,$Tbl2,$acc10
 1051     xor $t2,$t2,$acc02
 1052     lwzx    $acc11,$Tbl2,$acc11
 1053     xor $t3,$t3,$acc03
 1054     lwzx    $acc12,$Tbl3,$acc12
 1055     xor $t0,$t0,$acc04
 1056     lwzx    $acc13,$Tbl3,$acc13
 1057     xor $t1,$t1,$acc05
 1058     lwzx    $acc14,$Tbl3,$acc14
 1059     xor $t2,$t2,$acc06
 1060     lwzx    $acc15,$Tbl3,$acc15
 1061     xor $t3,$t3,$acc07
 1062     xor $t0,$t0,$acc08
 1063     xor $t1,$t1,$acc09
 1064     xor $t2,$t2,$acc10
 1065     xor $t3,$t3,$acc11
 1066     xor $s0,$t0,$acc12
 1067     xor $s1,$t1,$acc13
 1068     xor $s2,$t2,$acc14
 1069     xor $s3,$t3,$acc15
 1070     addi    $key,$key,16
 1071     bdnz    Ldec_loop
 1072 
 1073     addi    $Tbl2,$Tbl0,2048
 1074     nop
 1075     lwz $t0,0($key)
 1076     rlwinm  $acc00,$s0,`32-24`,24,31
 1077     lwz $t1,4($key)
 1078     rlwinm  $acc01,$s1,`32-24`,24,31
 1079     lwz $t2,8($key)
 1080     rlwinm  $acc02,$s2,`32-24`,24,31
 1081     lwz $t3,12($key)
 1082     rlwinm  $acc03,$s3,`32-24`,24,31
 1083     lwz $acc08,`2048+0`($Tbl0)  ! prefetch Td4
 1084     rlwinm  $acc04,$s3,`32-16`,24,31
 1085     lwz $acc09,`2048+32`($Tbl0)
 1086     rlwinm  $acc05,$s0,`32-16`,24,31
 1087     lwz $acc10,`2048+64`($Tbl0)
 1088     lbzx    $acc00,$Tbl2,$acc00
 1089     lwz $acc11,`2048+96`($Tbl0)
 1090     lbzx    $acc01,$Tbl2,$acc01
 1091     lwz $acc12,`2048+128`($Tbl0)
 1092     rlwinm  $acc06,$s1,`32-16`,24,31
 1093     lwz $acc13,`2048+160`($Tbl0)
 1094     rlwinm  $acc07,$s2,`32-16`,24,31
 1095     lwz $acc14,`2048+192`($Tbl0)
 1096     rlwinm  $acc08,$s2,`32-8`,24,31
 1097     lwz $acc15,`2048+224`($Tbl0)
 1098     rlwinm  $acc09,$s3,`32-8`,24,31
 1099     lbzx    $acc02,$Tbl2,$acc02
 1100     rlwinm  $acc10,$s0,`32-8`,24,31
 1101     lbzx    $acc03,$Tbl2,$acc03
 1102     rlwinm  $acc11,$s1,`32-8`,24,31
 1103     lbzx    $acc04,$Tbl2,$acc04
 1104     rlwinm  $acc12,$s1,`0`,24,31
 1105     lbzx    $acc05,$Tbl2,$acc05
 1106     rlwinm  $acc13,$s2,`0`,24,31
 1107     lbzx    $acc06,$Tbl2,$acc06
 1108     rlwinm  $acc14,$s3,`0`,24,31
 1109     lbzx    $acc07,$Tbl2,$acc07
 1110     rlwinm  $acc15,$s0,`0`,24,31
 1111     lbzx    $acc08,$Tbl2,$acc08
 1112     rlwinm  $s0,$acc00,24,0,7
 1113     lbzx    $acc09,$Tbl2,$acc09
 1114     rlwinm  $s1,$acc01,24,0,7
 1115     lbzx    $acc10,$Tbl2,$acc10
 1116     rlwinm  $s2,$acc02,24,0,7
 1117     lbzx    $acc11,$Tbl2,$acc11
 1118     rlwinm  $s3,$acc03,24,0,7
 1119     lbzx    $acc12,$Tbl2,$acc12
 1120     rlwimi  $s0,$acc04,16,8,15
 1121     lbzx    $acc13,$Tbl2,$acc13
 1122     rlwimi  $s1,$acc05,16,8,15
 1123     lbzx    $acc14,$Tbl2,$acc14
 1124     rlwimi  $s2,$acc06,16,8,15
 1125     lbzx    $acc15,$Tbl2,$acc15
 1126     rlwimi  $s3,$acc07,16,8,15
 1127     rlwimi  $s0,$acc08,8,16,23
 1128     rlwimi  $s1,$acc09,8,16,23
 1129     rlwimi  $s2,$acc10,8,16,23
 1130     rlwimi  $s3,$acc11,8,16,23
 1131     or  $s0,$s0,$acc12
 1132     or  $s1,$s1,$acc13
 1133     or  $s2,$s2,$acc14
 1134     or  $s3,$s3,$acc15
 1135     xor $s0,$s0,$t0
 1136     xor $s1,$s1,$t1
 1137     xor $s2,$s2,$t2
 1138     xor $s3,$s3,$t3
 1139     blr
 1140     .long   0
 1141     .byte   0,12,0x14,0,0,0,0,0
 1142 
 1143 .align  4
 1144 Lppc_AES_decrypt_compact:
 1145     lwz $acc00,240($key)
 1146     addi    $Tbl1,$Tbl0,2048
 1147     lwz $t0,0($key)
 1148     lis $mask80,0x8080
 1149     lwz $t1,4($key)
 1150     lis $mask1b,0x1b1b
 1151     lwz $t2,8($key)
 1152     ori $mask80,$mask80,0x8080
 1153     lwz $t3,12($key)
 1154     ori $mask1b,$mask1b,0x1b1b
 1155     addi    $key,$key,16
 1156 ___
 1157 $code.=<<___ if ($SIZE_T==8);
 1158     insrdi  $mask80,$mask80,32,0
 1159     insrdi  $mask1b,$mask1b,32,0
 1160 ___
 1161 $code.=<<___;
 1162     mtctr   $acc00
 1163 .align  4
 1164 Ldec_compact_loop:
 1165     xor $s0,$s0,$t0
 1166     xor $s1,$s1,$t1
 1167     rlwinm  $acc00,$s0,`32-24`,24,31
 1168     xor $s2,$s2,$t2
 1169     rlwinm  $acc01,$s1,`32-24`,24,31
 1170     xor $s3,$s3,$t3
 1171     rlwinm  $acc02,$s2,`32-24`,24,31
 1172     rlwinm  $acc03,$s3,`32-24`,24,31
 1173     rlwinm  $acc04,$s3,`32-16`,24,31
 1174     rlwinm  $acc05,$s0,`32-16`,24,31
 1175     rlwinm  $acc06,$s1,`32-16`,24,31
 1176     rlwinm  $acc07,$s2,`32-16`,24,31
 1177     lbzx    $acc00,$Tbl1,$acc00
 1178     rlwinm  $acc08,$s2,`32-8`,24,31
 1179     lbzx    $acc01,$Tbl1,$acc01
 1180     rlwinm  $acc09,$s3,`32-8`,24,31
 1181     lbzx    $acc02,$Tbl1,$acc02
 1182     rlwinm  $acc10,$s0,`32-8`,24,31
 1183     lbzx    $acc03,$Tbl1,$acc03
 1184     rlwinm  $acc11,$s1,`32-8`,24,31
 1185     lbzx    $acc04,$Tbl1,$acc04
 1186     rlwinm  $acc12,$s1,`0`,24,31
 1187     lbzx    $acc05,$Tbl1,$acc05
 1188     rlwinm  $acc13,$s2,`0`,24,31
 1189     lbzx    $acc06,$Tbl1,$acc06
 1190     rlwinm  $acc14,$s3,`0`,24,31
 1191     lbzx    $acc07,$Tbl1,$acc07
 1192     rlwinm  $acc15,$s0,`0`,24,31
 1193     lbzx    $acc08,$Tbl1,$acc08
 1194     rlwinm  $s0,$acc00,24,0,7
 1195     lbzx    $acc09,$Tbl1,$acc09
 1196     rlwinm  $s1,$acc01,24,0,7
 1197     lbzx    $acc10,$Tbl1,$acc10
 1198     rlwinm  $s2,$acc02,24,0,7
 1199     lbzx    $acc11,$Tbl1,$acc11
 1200     rlwinm  $s3,$acc03,24,0,7
 1201     lbzx    $acc12,$Tbl1,$acc12
 1202     rlwimi  $s0,$acc04,16,8,15
 1203     lbzx    $acc13,$Tbl1,$acc13
 1204     rlwimi  $s1,$acc05,16,8,15
 1205     lbzx    $acc14,$Tbl1,$acc14
 1206     rlwimi  $s2,$acc06,16,8,15
 1207     lbzx    $acc15,$Tbl1,$acc15
 1208     rlwimi  $s3,$acc07,16,8,15
 1209     rlwimi  $s0,$acc08,8,16,23
 1210     rlwimi  $s1,$acc09,8,16,23
 1211     rlwimi  $s2,$acc10,8,16,23
 1212     rlwimi  $s3,$acc11,8,16,23
 1213     lwz $t0,0($key)
 1214     or  $s0,$s0,$acc12
 1215     lwz $t1,4($key)
 1216     or  $s1,$s1,$acc13
 1217     lwz $t2,8($key)
 1218     or  $s2,$s2,$acc14
 1219     lwz $t3,12($key)
 1220     or  $s3,$s3,$acc15
 1221 
 1222     addi    $key,$key,16
 1223     bdz Ldec_compact_done
 1224 ___
 1225 $code.=<<___ if ($SIZE_T==8);
 1226     # vectorized permutation improves decrypt performance by 10%
 1227     insrdi  $s0,$s1,32,0
 1228     insrdi  $s2,$s3,32,0
 1229 
 1230     and $acc00,$s0,$mask80  # r1=r0&0x80808080
 1231     and $acc02,$s2,$mask80
 1232     srdi    $acc04,$acc00,7     # r1>>7
 1233     srdi    $acc06,$acc02,7
 1234     andc    $acc08,$s0,$mask80  # r0&0x7f7f7f7f
 1235     andc    $acc10,$s2,$mask80
 1236     sub $acc00,$acc00,$acc04    # r1-(r1>>7)
 1237     sub $acc02,$acc02,$acc06
 1238     add $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
 1239     add $acc10,$acc10,$acc10
 1240     and $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
 1241     and $acc02,$acc02,$mask1b
 1242     xor $acc00,$acc00,$acc08    # r2
 1243     xor $acc02,$acc02,$acc10
 1244 
 1245     and $acc04,$acc00,$mask80   # r1=r2&0x80808080
 1246     and $acc06,$acc02,$mask80
 1247     srdi    $acc08,$acc04,7     # r1>>7
 1248     srdi    $acc10,$acc06,7
 1249     andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
 1250     andc    $acc14,$acc02,$mask80
 1251     sub $acc04,$acc04,$acc08    # r1-(r1>>7)
 1252     sub $acc06,$acc06,$acc10
 1253     add $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
 1254     add $acc14,$acc14,$acc14
 1255     and $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
 1256     and $acc06,$acc06,$mask1b
 1257     xor $acc04,$acc04,$acc12    # r4
 1258     xor $acc06,$acc06,$acc14
 1259 
 1260     and $acc08,$acc04,$mask80   # r1=r4&0x80808080
 1261     and $acc10,$acc06,$mask80
 1262     srdi    $acc12,$acc08,7     # r1>>7
 1263     srdi    $acc14,$acc10,7
 1264     sub $acc08,$acc08,$acc12    # r1-(r1>>7)
 1265     sub $acc10,$acc10,$acc14
 1266     andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
 1267     andc    $acc14,$acc06,$mask80
 1268     add $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
 1269     add $acc14,$acc14,$acc14
 1270     and $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
 1271     and $acc10,$acc10,$mask1b
 1272     xor $acc08,$acc08,$acc12    # r8
 1273     xor $acc10,$acc10,$acc14
 1274 
 1275     xor $acc00,$acc00,$s0   # r2^r0
 1276     xor $acc02,$acc02,$s2
 1277     xor $acc04,$acc04,$s0   # r4^r0
 1278     xor $acc06,$acc06,$s2
 1279 
 1280     extrdi  $acc01,$acc00,32,0
 1281     extrdi  $acc03,$acc02,32,0
 1282     extrdi  $acc05,$acc04,32,0
 1283     extrdi  $acc07,$acc06,32,0
 1284     extrdi  $acc09,$acc08,32,0
 1285     extrdi  $acc11,$acc10,32,0
 1286 ___
 1287 $code.=<<___ if ($SIZE_T==4);
 1288     and $acc00,$s0,$mask80  # r1=r0&0x80808080
 1289     and $acc01,$s1,$mask80
 1290     and $acc02,$s2,$mask80
 1291     and $acc03,$s3,$mask80
 1292     srwi    $acc04,$acc00,7     # r1>>7
 1293     andc    $acc08,$s0,$mask80  # r0&0x7f7f7f7f
 1294     srwi    $acc05,$acc01,7
 1295     andc    $acc09,$s1,$mask80
 1296     srwi    $acc06,$acc02,7
 1297     andc    $acc10,$s2,$mask80
 1298     srwi    $acc07,$acc03,7
 1299     andc    $acc11,$s3,$mask80
 1300     sub $acc00,$acc00,$acc04    # r1-(r1>>7)
 1301     sub $acc01,$acc01,$acc05
 1302     sub $acc02,$acc02,$acc06
 1303     sub $acc03,$acc03,$acc07
 1304     add $acc08,$acc08,$acc08    # (r0&0x7f7f7f7f)<<1
 1305     add $acc09,$acc09,$acc09
 1306     add $acc10,$acc10,$acc10
 1307     add $acc11,$acc11,$acc11
 1308     and $acc00,$acc00,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
 1309     and $acc01,$acc01,$mask1b
 1310     and $acc02,$acc02,$mask1b
 1311     and $acc03,$acc03,$mask1b
 1312     xor $acc00,$acc00,$acc08    # r2
 1313     xor $acc01,$acc01,$acc09
 1314     xor $acc02,$acc02,$acc10
 1315     xor $acc03,$acc03,$acc11
 1316 
 1317     and $acc04,$acc00,$mask80   # r1=r2&0x80808080
 1318     and $acc05,$acc01,$mask80
 1319     and $acc06,$acc02,$mask80
 1320     and $acc07,$acc03,$mask80
 1321     srwi    $acc08,$acc04,7     # r1>>7
 1322     andc    $acc12,$acc00,$mask80   # r2&0x7f7f7f7f
 1323     srwi    $acc09,$acc05,7
 1324     andc    $acc13,$acc01,$mask80
 1325     srwi    $acc10,$acc06,7
 1326     andc    $acc14,$acc02,$mask80
 1327     srwi    $acc11,$acc07,7
 1328     andc    $acc15,$acc03,$mask80
 1329     sub $acc04,$acc04,$acc08    # r1-(r1>>7)
 1330     sub $acc05,$acc05,$acc09
 1331     sub $acc06,$acc06,$acc10
 1332     sub $acc07,$acc07,$acc11
 1333     add $acc12,$acc12,$acc12    # (r2&0x7f7f7f7f)<<1
 1334     add $acc13,$acc13,$acc13
 1335     add $acc14,$acc14,$acc14
 1336     add $acc15,$acc15,$acc15
 1337     and $acc04,$acc04,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
 1338     and $acc05,$acc05,$mask1b
 1339     and $acc06,$acc06,$mask1b
 1340     and $acc07,$acc07,$mask1b
 1341     xor $acc04,$acc04,$acc12    # r4
 1342     xor $acc05,$acc05,$acc13
 1343     xor $acc06,$acc06,$acc14
 1344     xor $acc07,$acc07,$acc15
 1345 
 1346     and $acc08,$acc04,$mask80   # r1=r4&0x80808080
 1347     and $acc09,$acc05,$mask80
 1348     srwi    $acc12,$acc08,7     # r1>>7
 1349     and $acc10,$acc06,$mask80
 1350     srwi    $acc13,$acc09,7
 1351     and $acc11,$acc07,$mask80
 1352     srwi    $acc14,$acc10,7
 1353     sub $acc08,$acc08,$acc12    # r1-(r1>>7)
 1354     srwi    $acc15,$acc11,7
 1355     sub $acc09,$acc09,$acc13
 1356     sub $acc10,$acc10,$acc14
 1357     sub $acc11,$acc11,$acc15
 1358     andc    $acc12,$acc04,$mask80   # r4&0x7f7f7f7f
 1359     andc    $acc13,$acc05,$mask80
 1360     andc    $acc14,$acc06,$mask80
 1361     andc    $acc15,$acc07,$mask80
 1362     add $acc12,$acc12,$acc12    # (r4&0x7f7f7f7f)<<1
 1363     add $acc13,$acc13,$acc13
 1364     add $acc14,$acc14,$acc14
 1365     add $acc15,$acc15,$acc15
 1366     and $acc08,$acc08,$mask1b   # (r1-(r1>>7))&0x1b1b1b1b
 1367     and $acc09,$acc09,$mask1b
 1368     and $acc10,$acc10,$mask1b
 1369     and $acc11,$acc11,$mask1b
 1370     xor $acc08,$acc08,$acc12    # r8
 1371     xor $acc09,$acc09,$acc13
 1372     xor $acc10,$acc10,$acc14
 1373     xor $acc11,$acc11,$acc15
 1374 
 1375     xor $acc00,$acc00,$s0   # r2^r0
 1376     xor $acc01,$acc01,$s1
 1377     xor $acc02,$acc02,$s2
 1378     xor $acc03,$acc03,$s3
 1379     xor $acc04,$acc04,$s0   # r4^r0
 1380     xor $acc05,$acc05,$s1
 1381     xor $acc06,$acc06,$s2
 1382     xor $acc07,$acc07,$s3
 1383 ___
 1384 $code.=<<___;
 1385     rotrwi  $s0,$s0,8       # = ROTATE(r0,8)
 1386     rotrwi  $s1,$s1,8
 1387     xor $s0,$s0,$acc00      # ^= r2^r0
 1388     rotrwi  $s2,$s2,8
 1389     xor $s1,$s1,$acc01
 1390     rotrwi  $s3,$s3,8
 1391     xor $s2,$s2,$acc02
 1392     xor $s3,$s3,$acc03
 1393     xor $acc00,$acc00,$acc08
 1394     xor $acc01,$acc01,$acc09
 1395     xor $acc02,$acc02,$acc10
 1396     xor $acc03,$acc03,$acc11
 1397     xor $s0,$s0,$acc04      # ^= r4^r0
 1398     rotrwi  $acc00,$acc00,24
 1399     xor $s1,$s1,$acc05
 1400     rotrwi  $acc01,$acc01,24
 1401     xor $s2,$s2,$acc06
 1402     rotrwi  $acc02,$acc02,24
 1403     xor $s3,$s3,$acc07
 1404     rotrwi  $acc03,$acc03,24
 1405     xor $acc04,$acc04,$acc08
 1406     xor $acc05,$acc05,$acc09
 1407     xor $acc06,$acc06,$acc10
 1408     xor $acc07,$acc07,$acc11
 1409     xor $s0,$s0,$acc08      # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
 1410     rotrwi  $acc04,$acc04,16
 1411     xor $s1,$s1,$acc09
 1412     rotrwi  $acc05,$acc05,16
 1413     xor $s2,$s2,$acc10
 1414     rotrwi  $acc06,$acc06,16
 1415     xor $s3,$s3,$acc11
 1416     rotrwi  $acc07,$acc07,16
 1417     xor $s0,$s0,$acc00      # ^= ROTATE(r8^r2^r0,24)
 1418     rotrwi  $acc08,$acc08,8
 1419     xor $s1,$s1,$acc01
 1420     rotrwi  $acc09,$acc09,8
 1421     xor $s2,$s2,$acc02
 1422     rotrwi  $acc10,$acc10,8
 1423     xor $s3,$s3,$acc03
 1424     rotrwi  $acc11,$acc11,8
 1425     xor $s0,$s0,$acc04      # ^= ROTATE(r8^r4^r0,16)
 1426     xor $s1,$s1,$acc05
 1427     xor $s2,$s2,$acc06
 1428     xor $s3,$s3,$acc07
 1429     xor $s0,$s0,$acc08      # ^= ROTATE(r8,8)   
 1430     xor $s1,$s1,$acc09  
 1431     xor $s2,$s2,$acc10  
 1432     xor $s3,$s3,$acc11  
 1433 
 1434     b   Ldec_compact_loop
 1435 .align  4
 1436 Ldec_compact_done:
 1437     xor $s0,$s0,$t0
 1438     xor $s1,$s1,$t1
 1439     xor $s2,$s2,$t2
 1440     xor $s3,$s3,$t3
 1441     blr
 1442     .long   0
 1443     .byte   0,12,0x14,0,0,0,0,0
 1444 .size   .AES_decrypt,.-.AES_decrypt
 1445 
 1446 .asciz  "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
 1447 .align  7
 1448 ___
 1449 
 1450 $code =~ s/\`([^\`]*)\`/eval $1/gem;
 1451 print $code;
 1452 close STDOUT;