"Fossies" - the Fresh Open Source Software Archive

Member "openssl-1.0.2q/crypto/aes/asm/aes-sparcv9.pl" (20 Nov 2018, 30021 Bytes) of package /linux/misc/openssl-1.0.2q.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "aes-sparcv9.pl" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.1.0g_vs_1.1.1-pre2.

    1 #!/usr/bin/env perl
    2 #
    3 # ====================================================================
    4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
    5 # project. Rights for redistribution and usage in source and binary
    6 # forms are granted according to the OpenSSL license.
    7 # ====================================================================
    8 #
    9 # Version 1.1
   10 #
   11 # The major reason for undertaken effort was to mitigate the hazard of
   12 # cache-timing attack. This is [currently and initially!] addressed in
   13 # two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
   14 # 2. References to them are scheduled for L2 cache latency, meaning
   15 # that the tables don't have to reside in L1 cache. Once again, this
   16 # is an initial draft and one should expect more countermeasures to
   17 # be implemented...
   18 #
   19 # Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
   20 # round.
   21 #
   22 # Even though performance was not the primary goal [on the contrary,
   23 # extra shifts "induced" by compressed S-box and longer loop epilogue
   24 # "induced" by scheduling for L2 have negative effect on performance],
   25 # the code turned out to run in ~23 cycles per processed byte en-/
   26 # decrypted with 128-bit key. This is pretty good result for code
   27 # with mentioned qualities and UltraSPARC core. Compared to Sun C
   28 # generated code my encrypt procedure runs just few percents faster,
   29 # while decrypt one - whole 50% faster [yes, Sun C failed to generate
   30 # optimal decrypt procedure]. Compared to GNU C generated code both
   31 # procedures are more than 60% faster:-)
   32 
   33 $bits=32;
   34 for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
   35 if ($bits==64)  { $bias=2047; $frame=192; }
   36 else        { $bias=0;    $frame=112; }
   37 $locals=16;
   38 
   39 $acc0="%l0";
   40 $acc1="%o0";
   41 $acc2="%o1";
   42 $acc3="%o2";
   43 
   44 $acc4="%l1";
   45 $acc5="%o3";
   46 $acc6="%o4";
   47 $acc7="%o5";
   48 
   49 $acc8="%l2";
   50 $acc9="%o7";
   51 $acc10="%g1";
   52 $acc11="%g2";
   53 
   54 $acc12="%l3";
   55 $acc13="%g3";
   56 $acc14="%g4";
   57 $acc15="%g5";
   58 
   59 $t0="%l4";
   60 $t1="%l5";
   61 $t2="%l6";
   62 $t3="%l7";
   63 
   64 $s0="%i0";
   65 $s1="%i1";
   66 $s2="%i2";
   67 $s3="%i3";
   68 $tbl="%i4";
   69 $key="%i5";
   70 $rounds="%i7";  # aliases with return address, which is off-loaded to stack
   71 
   72 sub _data_word()
   73 { my $i;
   74     while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
   75 }
   76 
   77 $code.=<<___ if ($bits==64);
   78 .register   %g2,#scratch
   79 .register   %g3,#scratch
   80 ___
   81 $code.=<<___;
   82 .section    ".text",#alloc,#execinstr
   83 
   84 .align  256
   85 AES_Te:
   86 ___
   87 &_data_word(
   88     0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
   89     0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
   90     0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
   91     0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
   92     0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
   93     0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
   94     0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
   95     0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
   96     0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
   97     0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
   98     0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
   99     0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
  100     0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
  101     0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
  102     0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
  103     0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
  104     0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
  105     0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
  106     0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
  107     0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
  108     0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
  109     0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
  110     0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
  111     0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
  112     0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
  113     0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
  114     0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
  115     0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
  116     0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
  117     0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
  118     0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
  119     0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
  120     0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
  121     0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
  122     0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
  123     0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
  124     0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
  125     0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
  126     0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
  127     0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
  128     0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
  129     0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
  130     0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
  131     0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
  132     0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
  133     0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
  134     0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
  135     0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
  136     0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
  137     0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
  138     0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
  139     0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
  140     0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
  141     0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
  142     0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
  143     0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
  144     0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
  145     0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
  146     0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
  147     0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
  148     0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
  149     0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
  150     0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
  151     0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
  152 $code.=<<___;
  153     .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
  154     .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
  155     .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
  156     .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
  157     .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
  158     .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
  159     .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
  160     .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
  161     .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
  162     .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
  163     .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
  164     .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
  165     .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
  166     .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
  167     .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
  168     .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
  169     .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
  170     .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
  171     .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
  172     .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
  173     .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
  174     .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
  175     .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
  176     .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
  177     .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
  178     .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
  179     .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
  180     .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
  181     .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
  182     .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
  183     .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
  184     .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  185 .type   AES_Te,#object
  186 .size   AES_Te,(.-AES_Te)
  187 
  188 .align  64
  189 .skip   16
  190 _sparcv9_AES_encrypt:
  191     save    %sp,-$frame-$locals,%sp
  192     stx %i7,[%sp+$bias+$frame+0]    ! off-load return address
  193     ld  [$key+240],$rounds
  194     ld  [$key+0],$t0
  195     ld  [$key+4],$t1            !
  196     ld  [$key+8],$t2
  197     srl $rounds,1,$rounds
  198     xor $t0,$s0,$s0
  199     ld  [$key+12],$t3
  200     srl $s0,21,$acc0
  201     xor $t1,$s1,$s1
  202     ld  [$key+16],$t0
  203     srl $s1,13,$acc1            !
  204     xor $t2,$s2,$s2
  205     ld  [$key+20],$t1
  206     xor $t3,$s3,$s3
  207     ld  [$key+24],$t2
  208     and $acc0,2040,$acc0
  209     ld  [$key+28],$t3
  210     nop
  211 .Lenc_loop:
  212     srl $s2,5,$acc2         !
  213     and $acc1,2040,$acc1
  214     ldx [$tbl+$acc0],$acc0
  215     sll $s3,3,$acc3
  216     and $acc2,2040,$acc2
  217     ldx [$tbl+$acc1],$acc1
  218     srl $s1,21,$acc4
  219     and $acc3,2040,$acc3
  220     ldx [$tbl+$acc2],$acc2      !
  221     srl $s2,13,$acc5
  222     and $acc4,2040,$acc4
  223     ldx [$tbl+$acc3],$acc3
  224     srl $s3,5,$acc6
  225     and $acc5,2040,$acc5
  226     ldx [$tbl+$acc4],$acc4
  227     fmovs   %f0,%f0
  228     sll $s0,3,$acc7         !
  229     and $acc6,2040,$acc6
  230     ldx [$tbl+$acc5],$acc5
  231     srl $s2,21,$acc8
  232     and $acc7,2040,$acc7
  233     ldx [$tbl+$acc6],$acc6
  234     srl $s3,13,$acc9
  235     and $acc8,2040,$acc8
  236     ldx [$tbl+$acc7],$acc7      !
  237     srl $s0,5,$acc10
  238     and $acc9,2040,$acc9
  239     ldx [$tbl+$acc8],$acc8
  240     sll $s1,3,$acc11
  241     and $acc10,2040,$acc10
  242     ldx [$tbl+$acc9],$acc9
  243     fmovs   %f0,%f0
  244     srl $s3,21,$acc12           !
  245     and $acc11,2040,$acc11
  246     ldx [$tbl+$acc10],$acc10
  247     srl $s0,13,$acc13
  248     and $acc12,2040,$acc12
  249     ldx [$tbl+$acc11],$acc11
  250     srl $s1,5,$acc14
  251     and $acc13,2040,$acc13
  252     ldx [$tbl+$acc12],$acc12        !
  253     sll $s2,3,$acc15
  254     and $acc14,2040,$acc14
  255     ldx [$tbl+$acc13],$acc13
  256     and $acc15,2040,$acc15
  257     add $key,32,$key
  258     ldx [$tbl+$acc14],$acc14
  259     fmovs   %f0,%f0
  260     subcc   $rounds,1,$rounds       !
  261     ldx [$tbl+$acc15],$acc15
  262     bz,a,pn %icc,.Lenc_last
  263     add $tbl,2048,$rounds
  264 
  265         srlx    $acc1,8,$acc1
  266         xor $acc0,$t0,$t0
  267     ld  [$key+0],$s0
  268     fmovs   %f0,%f0
  269         srlx    $acc2,16,$acc2      !
  270         xor $acc1,$t0,$t0
  271     ld  [$key+4],$s1
  272         srlx    $acc3,24,$acc3
  273         xor $acc2,$t0,$t0
  274     ld  [$key+8],$s2
  275         srlx    $acc5,8,$acc5
  276         xor $acc3,$t0,$t0
  277     ld  [$key+12],$s3           !
  278         srlx    $acc6,16,$acc6
  279         xor $acc4,$t1,$t1
  280     fmovs   %f0,%f0
  281         srlx    $acc7,24,$acc7
  282         xor $acc5,$t1,$t1
  283         srlx    $acc9,8,$acc9
  284         xor $acc6,$t1,$t1
  285         srlx    $acc10,16,$acc10    !
  286         xor $acc7,$t1,$t1
  287         srlx    $acc11,24,$acc11
  288         xor $acc8,$t2,$t2
  289         srlx    $acc13,8,$acc13
  290         xor $acc9,$t2,$t2
  291         srlx    $acc14,16,$acc14
  292         xor $acc10,$t2,$t2
  293         srlx    $acc15,24,$acc15    !
  294         xor $acc11,$t2,$t2
  295         xor $acc12,$acc14,$acc14
  296         xor $acc13,$t3,$t3
  297     srl $t0,21,$acc0
  298         xor $acc14,$t3,$t3
  299     srl $t1,13,$acc1
  300         xor $acc15,$t3,$t3
  301 
  302     and $acc0,2040,$acc0        !
  303     srl $t2,5,$acc2
  304     and $acc1,2040,$acc1
  305     ldx [$tbl+$acc0],$acc0
  306     sll $t3,3,$acc3
  307     and $acc2,2040,$acc2
  308     ldx [$tbl+$acc1],$acc1
  309     fmovs   %f0,%f0
  310     srl $t1,21,$acc4            !
  311     and $acc3,2040,$acc3
  312     ldx [$tbl+$acc2],$acc2
  313     srl $t2,13,$acc5
  314     and $acc4,2040,$acc4
  315     ldx [$tbl+$acc3],$acc3
  316     srl $t3,5,$acc6
  317     and $acc5,2040,$acc5
  318     ldx [$tbl+$acc4],$acc4      !
  319     sll $t0,3,$acc7
  320     and $acc6,2040,$acc6
  321     ldx [$tbl+$acc5],$acc5
  322     srl $t2,21,$acc8
  323     and $acc7,2040,$acc7
  324     ldx [$tbl+$acc6],$acc6
  325     fmovs   %f0,%f0
  326     srl $t3,13,$acc9            !
  327     and $acc8,2040,$acc8
  328     ldx [$tbl+$acc7],$acc7
  329     srl $t0,5,$acc10
  330     and $acc9,2040,$acc9
  331     ldx [$tbl+$acc8],$acc8
  332     sll $t1,3,$acc11
  333     and $acc10,2040,$acc10
  334     ldx [$tbl+$acc9],$acc9      !
  335     srl $t3,21,$acc12
  336     and $acc11,2040,$acc11
  337     ldx [$tbl+$acc10],$acc10
  338     srl $t0,13,$acc13
  339     and $acc12,2040,$acc12
  340     ldx [$tbl+$acc11],$acc11
  341     fmovs   %f0,%f0
  342     srl $t1,5,$acc14            !
  343     and $acc13,2040,$acc13
  344     ldx [$tbl+$acc12],$acc12
  345     sll $t2,3,$acc15
  346     and $acc14,2040,$acc14
  347     ldx [$tbl+$acc13],$acc13
  348         srlx    $acc1,8,$acc1
  349     and $acc15,2040,$acc15
  350     ldx [$tbl+$acc14],$acc14        !
  351 
  352         srlx    $acc2,16,$acc2
  353         xor $acc0,$s0,$s0
  354     ldx [$tbl+$acc15],$acc15
  355         srlx    $acc3,24,$acc3
  356         xor $acc1,$s0,$s0
  357     ld  [$key+16],$t0
  358     fmovs   %f0,%f0
  359         srlx    $acc5,8,$acc5       !
  360         xor $acc2,$s0,$s0
  361     ld  [$key+20],$t1
  362         srlx    $acc6,16,$acc6
  363         xor $acc3,$s0,$s0
  364     ld  [$key+24],$t2
  365         srlx    $acc7,24,$acc7
  366         xor $acc4,$s1,$s1
  367     ld  [$key+28],$t3           !
  368         srlx    $acc9,8,$acc9
  369         xor $acc5,$s1,$s1
  370     ldx [$tbl+2048+0],%g0       ! prefetch te4
  371         srlx    $acc10,16,$acc10
  372         xor $acc6,$s1,$s1
  373     ldx [$tbl+2048+32],%g0      ! prefetch te4
  374         srlx    $acc11,24,$acc11
  375         xor $acc7,$s1,$s1
  376     ldx [$tbl+2048+64],%g0      ! prefetch te4
  377         srlx    $acc13,8,$acc13
  378         xor $acc8,$s2,$s2
  379     ldx [$tbl+2048+96],%g0      ! prefetch te4
  380         srlx    $acc14,16,$acc14    !
  381         xor $acc9,$s2,$s2
  382     ldx [$tbl+2048+128],%g0     ! prefetch te4
  383         srlx    $acc15,24,$acc15
  384         xor $acc10,$s2,$s2
  385     ldx [$tbl+2048+160],%g0     ! prefetch te4
  386     srl $s0,21,$acc0
  387         xor $acc11,$s2,$s2
  388     ldx [$tbl+2048+192],%g0     ! prefetch te4
  389         xor $acc12,$acc14,$acc14
  390         xor $acc13,$s3,$s3
  391     ldx [$tbl+2048+224],%g0     ! prefetch te4
  392     srl $s1,13,$acc1            !
  393         xor $acc14,$s3,$s3
  394         xor $acc15,$s3,$s3
  395     ba  .Lenc_loop
  396     and $acc0,2040,$acc0
  397 
  398 .align  32
  399 .Lenc_last:
  400         srlx    $acc1,8,$acc1       !
  401         xor $acc0,$t0,$t0
  402     ld  [$key+0],$s0
  403         srlx    $acc2,16,$acc2
  404         xor $acc1,$t0,$t0
  405     ld  [$key+4],$s1
  406         srlx    $acc3,24,$acc3
  407         xor $acc2,$t0,$t0
  408     ld  [$key+8],$s2            !
  409         srlx    $acc5,8,$acc5
  410         xor $acc3,$t0,$t0
  411     ld  [$key+12],$s3
  412         srlx    $acc6,16,$acc6
  413         xor $acc4,$t1,$t1
  414         srlx    $acc7,24,$acc7
  415         xor $acc5,$t1,$t1
  416         srlx    $acc9,8,$acc9       !
  417         xor $acc6,$t1,$t1
  418         srlx    $acc10,16,$acc10
  419         xor $acc7,$t1,$t1
  420         srlx    $acc11,24,$acc11
  421         xor $acc8,$t2,$t2
  422         srlx    $acc13,8,$acc13
  423         xor $acc9,$t2,$t2
  424         srlx    $acc14,16,$acc14    !
  425         xor $acc10,$t2,$t2
  426         srlx    $acc15,24,$acc15
  427         xor $acc11,$t2,$t2
  428         xor $acc12,$acc14,$acc14
  429         xor $acc13,$t3,$t3
  430     srl $t0,24,$acc0
  431         xor $acc14,$t3,$t3
  432     srl $t1,16,$acc1            !
  433         xor $acc15,$t3,$t3
  434 
  435     srl $t2,8,$acc2
  436     and $acc1,255,$acc1
  437     ldub    [$rounds+$acc0],$acc0
  438     srl $t1,24,$acc4
  439     and $acc2,255,$acc2
  440     ldub    [$rounds+$acc1],$acc1
  441     srl $t2,16,$acc5            !
  442     and $t3,255,$acc3
  443     ldub    [$rounds+$acc2],$acc2
  444     ldub    [$rounds+$acc3],$acc3
  445     srl $t3,8,$acc6
  446     and $acc5,255,$acc5
  447     ldub    [$rounds+$acc4],$acc4
  448     fmovs   %f0,%f0
  449     srl $t2,24,$acc8            !
  450     and $acc6,255,$acc6
  451     ldub    [$rounds+$acc5],$acc5
  452     srl $t3,16,$acc9
  453     and $t0,255,$acc7
  454     ldub    [$rounds+$acc6],$acc6
  455     ldub    [$rounds+$acc7],$acc7
  456     fmovs   %f0,%f0
  457     srl $t0,8,$acc10            !
  458     and $acc9,255,$acc9
  459     ldub    [$rounds+$acc8],$acc8
  460     srl $t3,24,$acc12
  461     and $acc10,255,$acc10
  462     ldub    [$rounds+$acc9],$acc9
  463     srl $t0,16,$acc13
  464     and $t1,255,$acc11
  465     ldub    [$rounds+$acc10],$acc10     !
  466     srl $t1,8,$acc14
  467     and $acc13,255,$acc13
  468     ldub    [$rounds+$acc11],$acc11
  469     ldub    [$rounds+$acc12],$acc12
  470     and $acc14,255,$acc14
  471     ldub    [$rounds+$acc13],$acc13
  472     and $t2,255,$acc15
  473     ldub    [$rounds+$acc14],$acc14     !
  474 
  475         sll $acc0,24,$acc0
  476         xor $acc3,$s0,$s0
  477     ldub    [$rounds+$acc15],$acc15
  478         sll $acc1,16,$acc1
  479         xor $acc0,$s0,$s0
  480     ldx [%sp+$bias+$frame+0],%i7    ! restore return address
  481     fmovs   %f0,%f0
  482         sll $acc2,8,$acc2       !
  483         xor $acc1,$s0,$s0
  484         sll $acc4,24,$acc4
  485         xor $acc2,$s0,$s0
  486         sll $acc5,16,$acc5
  487         xor $acc7,$s1,$s1
  488         sll $acc6,8,$acc6
  489         xor $acc4,$s1,$s1
  490         sll $acc8,24,$acc8      !
  491         xor $acc5,$s1,$s1
  492         sll $acc9,16,$acc9
  493         xor $acc11,$s2,$s2
  494         sll $acc10,8,$acc10
  495         xor $acc6,$s1,$s1
  496         sll $acc12,24,$acc12
  497         xor $acc8,$s2,$s2
  498         sll $acc13,16,$acc13    !
  499         xor $acc9,$s2,$s2
  500         sll $acc14,8,$acc14
  501         xor $acc10,$s2,$s2
  502         xor $acc12,$acc14,$acc14
  503         xor $acc13,$s3,$s3
  504         xor $acc14,$s3,$s3
  505         xor $acc15,$s3,$s3
  506 
  507     ret
  508     restore
  509 .type   _sparcv9_AES_encrypt,#function
  510 .size   _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
  511 
  512 .align  32
  513 .globl  AES_encrypt
  514 AES_encrypt:
  515     or  %o0,%o1,%g1
  516     andcc   %g1,3,%g0
  517     bnz,pn  %xcc,.Lunaligned_enc
  518     save    %sp,-$frame,%sp
  519 
  520     ld  [%i0+0],%o0
  521     ld  [%i0+4],%o1
  522     ld  [%i0+8],%o2
  523     ld  [%i0+12],%o3
  524 
  525 1:  call    .+8
  526     add %o7,AES_Te-1b,%o4
  527     call    _sparcv9_AES_encrypt
  528     mov %i2,%o5
  529 
  530     st  %o0,[%i1+0]
  531     st  %o1,[%i1+4]
  532     st  %o2,[%i1+8]
  533     st  %o3,[%i1+12]
  534 
  535     ret
  536     restore
  537 
  538 .align  32
  539 .Lunaligned_enc:
  540     ldub    [%i0+0],%l0
  541     ldub    [%i0+1],%l1
  542     ldub    [%i0+2],%l2
  543 
  544     sll %l0,24,%l0
  545     ldub    [%i0+3],%l3
  546     sll %l1,16,%l1
  547     ldub    [%i0+4],%l4
  548     sll %l2,8,%l2
  549     or  %l1,%l0,%l0
  550     ldub    [%i0+5],%l5
  551     sll %l4,24,%l4
  552     or  %l3,%l2,%l2
  553     ldub    [%i0+6],%l6
  554     sll %l5,16,%l5
  555     or  %l0,%l2,%o0
  556     ldub    [%i0+7],%l7
  557 
  558     sll %l6,8,%l6
  559     or  %l5,%l4,%l4
  560     ldub    [%i0+8],%l0
  561     or  %l7,%l6,%l6
  562     ldub    [%i0+9],%l1
  563     or  %l4,%l6,%o1
  564     ldub    [%i0+10],%l2
  565 
  566     sll %l0,24,%l0
  567     ldub    [%i0+11],%l3
  568     sll %l1,16,%l1
  569     ldub    [%i0+12],%l4
  570     sll %l2,8,%l2
  571     or  %l1,%l0,%l0
  572     ldub    [%i0+13],%l5
  573     sll %l4,24,%l4
  574     or  %l3,%l2,%l2
  575     ldub    [%i0+14],%l6
  576     sll %l5,16,%l5
  577     or  %l0,%l2,%o2
  578     ldub    [%i0+15],%l7
  579 
  580     sll %l6,8,%l6
  581     or  %l5,%l4,%l4
  582     or  %l7,%l6,%l6
  583     or  %l4,%l6,%o3
  584 
  585 1:  call    .+8
  586     add %o7,AES_Te-1b,%o4
  587     call    _sparcv9_AES_encrypt
  588     mov %i2,%o5
  589 
  590     srl %o0,24,%l0
  591     srl %o0,16,%l1
  592     stb %l0,[%i1+0]
  593     srl %o0,8,%l2
  594     stb %l1,[%i1+1]
  595     stb %l2,[%i1+2]
  596     srl %o1,24,%l4
  597     stb %o0,[%i1+3]
  598 
  599     srl %o1,16,%l5
  600     stb %l4,[%i1+4]
  601     srl %o1,8,%l6
  602     stb %l5,[%i1+5]
  603     stb %l6,[%i1+6]
  604     srl %o2,24,%l0
  605     stb %o1,[%i1+7]
  606 
  607     srl %o2,16,%l1
  608     stb %l0,[%i1+8]
  609     srl %o2,8,%l2
  610     stb %l1,[%i1+9]
  611     stb %l2,[%i1+10]
  612     srl %o3,24,%l4
  613     stb %o2,[%i1+11]
  614 
  615     srl %o3,16,%l5
  616     stb %l4,[%i1+12]
  617     srl %o3,8,%l6
  618     stb %l5,[%i1+13]
  619     stb %l6,[%i1+14]
  620     stb %o3,[%i1+15]
  621 
  622     ret
  623     restore
  624 .type   AES_encrypt,#function
  625 .size   AES_encrypt,(.-AES_encrypt)
  626 
  627 ___
  628 
  629 $code.=<<___;
  630 .align  256
  631 AES_Td:
  632 ___
  633 &_data_word(
  634     0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
  635     0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
  636     0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
  637     0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
  638     0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
  639     0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
  640     0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
  641     0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
  642     0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
  643     0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
  644     0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
  645     0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
  646     0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
  647     0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
  648     0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
  649     0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
  650     0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
  651     0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
  652     0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
  653     0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
  654     0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
  655     0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
  656     0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
  657     0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
  658     0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
  659     0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
  660     0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
  661     0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
  662     0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
  663     0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
  664     0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
  665     0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
  666     0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
  667     0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
  668     0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
  669     0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
  670     0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
  671     0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
  672     0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
  673     0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
  674     0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
  675     0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
  676     0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
  677     0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
  678     0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
  679     0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
  680     0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
  681     0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
  682     0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
  683     0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
  684     0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
  685     0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
  686     0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
  687     0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
  688     0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
  689     0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
  690     0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
  691     0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
  692     0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
  693     0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
  694     0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
  695     0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
  696     0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
  697     0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
  698 $code.=<<___;
  699     .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
  700     .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
  701     .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
  702     .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
  703     .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
  704     .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
  705     .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
  706     .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
  707     .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
  708     .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
  709     .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
  710     .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
  711     .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
  712     .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
  713     .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
  714     .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
  715     .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
  716     .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
  717     .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
  718     .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
  719     .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
  720     .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
  721     .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
  722     .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
  723     .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
  724     .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
  725     .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
  726     .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
  727     .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
  728     .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
  729     .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
  730     .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  731 .type   AES_Td,#object
  732 .size   AES_Td,(.-AES_Td)
  733 
  734 .align  64
  735 .skip   16
  736 _sparcv9_AES_decrypt:
  737     save    %sp,-$frame-$locals,%sp
  738     stx %i7,[%sp+$bias+$frame+0]    ! off-load return address
  739     ld  [$key+240],$rounds
  740     ld  [$key+0],$t0
  741     ld  [$key+4],$t1            !
  742     ld  [$key+8],$t2
  743     ld  [$key+12],$t3
  744     srl $rounds,1,$rounds
  745     xor $t0,$s0,$s0
  746     ld  [$key+16],$t0
  747     xor $t1,$s1,$s1
  748     ld  [$key+20],$t1
  749     srl $s0,21,$acc0            !
  750     xor $t2,$s2,$s2
  751     ld  [$key+24],$t2
  752     xor $t3,$s3,$s3
  753     and $acc0,2040,$acc0
  754     ld  [$key+28],$t3
  755     srl $s3,13,$acc1
  756     nop
  757 .Ldec_loop:
  758     srl $s2,5,$acc2         !
  759     and $acc1,2040,$acc1
  760     ldx [$tbl+$acc0],$acc0
  761     sll $s1,3,$acc3
  762     and $acc2,2040,$acc2
  763     ldx [$tbl+$acc1],$acc1
  764     srl $s1,21,$acc4
  765     and $acc3,2040,$acc3
  766     ldx [$tbl+$acc2],$acc2      !
  767     srl $s0,13,$acc5
  768     and $acc4,2040,$acc4
  769     ldx [$tbl+$acc3],$acc3
  770     srl $s3,5,$acc6
  771     and $acc5,2040,$acc5
  772     ldx [$tbl+$acc4],$acc4
  773     fmovs   %f0,%f0
  774     sll $s2,3,$acc7         !
  775     and $acc6,2040,$acc6
  776     ldx [$tbl+$acc5],$acc5
  777     srl $s2,21,$acc8
  778     and $acc7,2040,$acc7
  779     ldx [$tbl+$acc6],$acc6
  780     srl $s1,13,$acc9
  781     and $acc8,2040,$acc8
  782     ldx [$tbl+$acc7],$acc7      !
  783     srl $s0,5,$acc10
  784     and $acc9,2040,$acc9
  785     ldx [$tbl+$acc8],$acc8
  786     sll $s3,3,$acc11
  787     and $acc10,2040,$acc10
  788     ldx [$tbl+$acc9],$acc9
  789     fmovs   %f0,%f0
  790     srl $s3,21,$acc12           !
  791     and $acc11,2040,$acc11
  792     ldx [$tbl+$acc10],$acc10
  793     srl $s2,13,$acc13
  794     and $acc12,2040,$acc12
  795     ldx [$tbl+$acc11],$acc11
  796     srl $s1,5,$acc14
  797     and $acc13,2040,$acc13
  798     ldx [$tbl+$acc12],$acc12        !
  799     sll $s0,3,$acc15
  800     and $acc14,2040,$acc14
  801     ldx [$tbl+$acc13],$acc13
  802     and $acc15,2040,$acc15
  803     add $key,32,$key
  804     ldx [$tbl+$acc14],$acc14
  805     fmovs   %f0,%f0
  806     subcc   $rounds,1,$rounds       !
  807     ldx [$tbl+$acc15],$acc15
  808     bz,a,pn %icc,.Ldec_last
  809     add $tbl,2048,$rounds
  810 
  811         srlx    $acc1,8,$acc1
  812         xor $acc0,$t0,$t0
  813     ld  [$key+0],$s0
  814     fmovs   %f0,%f0
  815         srlx    $acc2,16,$acc2      !
  816         xor $acc1,$t0,$t0
  817     ld  [$key+4],$s1
  818         srlx    $acc3,24,$acc3
  819         xor $acc2,$t0,$t0
  820     ld  [$key+8],$s2
  821         srlx    $acc5,8,$acc5
  822         xor $acc3,$t0,$t0
  823     ld  [$key+12],$s3           !
  824         srlx    $acc6,16,$acc6
  825         xor $acc4,$t1,$t1
  826     fmovs   %f0,%f0
  827         srlx    $acc7,24,$acc7
  828         xor $acc5,$t1,$t1
  829         srlx    $acc9,8,$acc9
  830         xor $acc6,$t1,$t1
  831         srlx    $acc10,16,$acc10    !
  832         xor $acc7,$t1,$t1
  833         srlx    $acc11,24,$acc11
  834         xor $acc8,$t2,$t2
  835         srlx    $acc13,8,$acc13
  836         xor $acc9,$t2,$t2
  837         srlx    $acc14,16,$acc14
  838         xor $acc10,$t2,$t2
  839         srlx    $acc15,24,$acc15    !
  840         xor $acc11,$t2,$t2
  841         xor $acc12,$acc14,$acc14
  842         xor $acc13,$t3,$t3
  843     srl $t0,21,$acc0
  844         xor $acc14,$t3,$t3
  845         xor $acc15,$t3,$t3
  846     srl $t3,13,$acc1
  847 
  848     and $acc0,2040,$acc0        !
  849     srl $t2,5,$acc2
  850     and $acc1,2040,$acc1
  851     ldx [$tbl+$acc0],$acc0
  852     sll $t1,3,$acc3
  853     and $acc2,2040,$acc2
  854     ldx [$tbl+$acc1],$acc1
  855     fmovs   %f0,%f0
  856     srl $t1,21,$acc4            !
  857     and $acc3,2040,$acc3
  858     ldx [$tbl+$acc2],$acc2
  859     srl $t0,13,$acc5
  860     and $acc4,2040,$acc4
  861     ldx [$tbl+$acc3],$acc3
  862     srl $t3,5,$acc6
  863     and $acc5,2040,$acc5
  864     ldx [$tbl+$acc4],$acc4      !
  865     sll $t2,3,$acc7
  866     and $acc6,2040,$acc6
  867     ldx [$tbl+$acc5],$acc5
  868     srl $t2,21,$acc8
  869     and $acc7,2040,$acc7
  870     ldx [$tbl+$acc6],$acc6
  871     fmovs   %f0,%f0
  872     srl $t1,13,$acc9            !
  873     and $acc8,2040,$acc8
  874     ldx [$tbl+$acc7],$acc7
  875     srl $t0,5,$acc10
  876     and $acc9,2040,$acc9
  877     ldx [$tbl+$acc8],$acc8
  878     sll $t3,3,$acc11
  879     and $acc10,2040,$acc10
  880     ldx [$tbl+$acc9],$acc9      !
  881     srl $t3,21,$acc12
  882     and $acc11,2040,$acc11
  883     ldx [$tbl+$acc10],$acc10
  884     srl $t2,13,$acc13
  885     and $acc12,2040,$acc12
  886     ldx [$tbl+$acc11],$acc11
  887     fmovs   %f0,%f0
  888     srl $t1,5,$acc14            !
  889     and $acc13,2040,$acc13
  890     ldx [$tbl+$acc12],$acc12
  891     sll $t0,3,$acc15
  892     and $acc14,2040,$acc14
  893     ldx [$tbl+$acc13],$acc13
  894         srlx    $acc1,8,$acc1
  895     and $acc15,2040,$acc15
  896     ldx [$tbl+$acc14],$acc14        !
  897 
  898         srlx    $acc2,16,$acc2
  899         xor $acc0,$s0,$s0
  900     ldx [$tbl+$acc15],$acc15
  901         srlx    $acc3,24,$acc3
  902         xor $acc1,$s0,$s0
  903     ld  [$key+16],$t0
  904     fmovs   %f0,%f0
  905         srlx    $acc5,8,$acc5       !
  906         xor $acc2,$s0,$s0
  907     ld  [$key+20],$t1
  908         srlx    $acc6,16,$acc6
  909         xor $acc3,$s0,$s0
  910     ld  [$key+24],$t2
  911         srlx    $acc7,24,$acc7
  912         xor $acc4,$s1,$s1
  913     ld  [$key+28],$t3           !
  914         srlx    $acc9,8,$acc9
  915         xor $acc5,$s1,$s1
  916     ldx [$tbl+2048+0],%g0       ! prefetch td4
  917         srlx    $acc10,16,$acc10
  918         xor $acc6,$s1,$s1
  919     ldx [$tbl+2048+32],%g0      ! prefetch td4
  920         srlx    $acc11,24,$acc11
  921         xor $acc7,$s1,$s1
  922     ldx [$tbl+2048+64],%g0      ! prefetch td4
  923         srlx    $acc13,8,$acc13
  924         xor $acc8,$s2,$s2
  925     ldx [$tbl+2048+96],%g0      ! prefetch td4
  926         srlx    $acc14,16,$acc14    !
  927         xor $acc9,$s2,$s2
  928     ldx [$tbl+2048+128],%g0     ! prefetch td4
  929         srlx    $acc15,24,$acc15
  930         xor $acc10,$s2,$s2
  931     ldx [$tbl+2048+160],%g0     ! prefetch td4
  932     srl $s0,21,$acc0
  933         xor $acc11,$s2,$s2
  934     ldx [$tbl+2048+192],%g0     ! prefetch td4
  935         xor $acc12,$acc14,$acc14
  936         xor $acc13,$s3,$s3
  937     ldx [$tbl+2048+224],%g0     ! prefetch td4
  938     and $acc0,2040,$acc0        !
  939         xor $acc14,$s3,$s3
  940         xor $acc15,$s3,$s3
  941     ba  .Ldec_loop
  942     srl $s3,13,$acc1
  943 
  944 .align  32
  945 .Ldec_last:
  946         srlx    $acc1,8,$acc1       !
  947         xor $acc0,$t0,$t0
  948     ld  [$key+0],$s0
  949         srlx    $acc2,16,$acc2
  950         xor $acc1,$t0,$t0
  951     ld  [$key+4],$s1
  952         srlx    $acc3,24,$acc3
  953         xor $acc2,$t0,$t0
  954     ld  [$key+8],$s2            !
  955         srlx    $acc5,8,$acc5
  956         xor $acc3,$t0,$t0
  957     ld  [$key+12],$s3
  958         srlx    $acc6,16,$acc6
  959         xor $acc4,$t1,$t1
  960         srlx    $acc7,24,$acc7
  961         xor $acc5,$t1,$t1
  962         srlx    $acc9,8,$acc9       !
  963         xor $acc6,$t1,$t1
  964         srlx    $acc10,16,$acc10
  965         xor $acc7,$t1,$t1
  966         srlx    $acc11,24,$acc11
  967         xor $acc8,$t2,$t2
  968         srlx    $acc13,8,$acc13
  969         xor $acc9,$t2,$t2
  970         srlx    $acc14,16,$acc14    !
  971         xor $acc10,$t2,$t2
  972         srlx    $acc15,24,$acc15
  973         xor $acc11,$t2,$t2
  974         xor $acc12,$acc14,$acc14
  975         xor $acc13,$t3,$t3
  976     srl $t0,24,$acc0
  977         xor $acc14,$t3,$t3
  978         xor $acc15,$t3,$t3      !
  979     srl $t3,16,$acc1
  980 
  981     srl $t2,8,$acc2
  982     and $acc1,255,$acc1
  983     ldub    [$rounds+$acc0],$acc0
  984     srl $t1,24,$acc4
  985     and $acc2,255,$acc2
  986     ldub    [$rounds+$acc1],$acc1
  987     srl $t0,16,$acc5            !
  988     and $t1,255,$acc3
  989     ldub    [$rounds+$acc2],$acc2
  990     ldub    [$rounds+$acc3],$acc3
  991     srl $t3,8,$acc6
  992     and $acc5,255,$acc5
  993     ldub    [$rounds+$acc4],$acc4
  994     fmovs   %f0,%f0
  995     srl $t2,24,$acc8            !
  996     and $acc6,255,$acc6
  997     ldub    [$rounds+$acc5],$acc5
  998     srl $t1,16,$acc9
  999     and $t2,255,$acc7
 1000     ldub    [$rounds+$acc6],$acc6
 1001     ldub    [$rounds+$acc7],$acc7
 1002     fmovs   %f0,%f0
 1003     srl $t0,8,$acc10            !
 1004     and $acc9,255,$acc9
 1005     ldub    [$rounds+$acc8],$acc8
 1006     srl $t3,24,$acc12
 1007     and $acc10,255,$acc10
 1008     ldub    [$rounds+$acc9],$acc9
 1009     srl $t2,16,$acc13
 1010     and $t3,255,$acc11
 1011     ldub    [$rounds+$acc10],$acc10     !
 1012     srl $t1,8,$acc14
 1013     and $acc13,255,$acc13
 1014     ldub    [$rounds+$acc11],$acc11
 1015     ldub    [$rounds+$acc12],$acc12
 1016     and $acc14,255,$acc14
 1017     ldub    [$rounds+$acc13],$acc13
 1018     and $t0,255,$acc15
 1019     ldub    [$rounds+$acc14],$acc14     !
 1020 
 1021         sll $acc0,24,$acc0
 1022         xor $acc3,$s0,$s0
 1023     ldub    [$rounds+$acc15],$acc15
 1024         sll $acc1,16,$acc1
 1025         xor $acc0,$s0,$s0
 1026     ldx [%sp+$bias+$frame+0],%i7    ! restore return address
 1027     fmovs   %f0,%f0
 1028         sll $acc2,8,$acc2       !
 1029         xor $acc1,$s0,$s0
 1030         sll $acc4,24,$acc4
 1031         xor $acc2,$s0,$s0
 1032         sll $acc5,16,$acc5
 1033         xor $acc7,$s1,$s1
 1034         sll $acc6,8,$acc6
 1035         xor $acc4,$s1,$s1
 1036         sll $acc8,24,$acc8      !
 1037         xor $acc5,$s1,$s1
 1038         sll $acc9,16,$acc9
 1039         xor $acc11,$s2,$s2
 1040         sll $acc10,8,$acc10
 1041         xor $acc6,$s1,$s1
 1042         sll $acc12,24,$acc12
 1043         xor $acc8,$s2,$s2
 1044         sll $acc13,16,$acc13    !
 1045         xor $acc9,$s2,$s2
 1046         sll $acc14,8,$acc14
 1047         xor $acc10,$s2,$s2
 1048         xor $acc12,$acc14,$acc14
 1049         xor $acc13,$s3,$s3
 1050         xor $acc14,$s3,$s3
 1051         xor $acc15,$s3,$s3
 1052 
 1053     ret
 1054     restore
 1055 .type   _sparcv9_AES_decrypt,#function
 1056 .size   _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
 1057 
 1058 .align  32
 1059 .globl  AES_decrypt
 1060 AES_decrypt:
 1061     or  %o0,%o1,%g1
 1062     andcc   %g1,3,%g0
 1063     bnz,pn  %xcc,.Lunaligned_dec
 1064     save    %sp,-$frame,%sp
 1065 
 1066     ld  [%i0+0],%o0
 1067     ld  [%i0+4],%o1
 1068     ld  [%i0+8],%o2
 1069     ld  [%i0+12],%o3
 1070 
 1071 1:  call    .+8
 1072     add %o7,AES_Td-1b,%o4
 1073     call    _sparcv9_AES_decrypt
 1074     mov %i2,%o5
 1075 
 1076     st  %o0,[%i1+0]
 1077     st  %o1,[%i1+4]
 1078     st  %o2,[%i1+8]
 1079     st  %o3,[%i1+12]
 1080 
 1081     ret
 1082     restore
 1083 
 1084 .align  32
 1085 .Lunaligned_dec:
 1086     ldub    [%i0+0],%l0
 1087     ldub    [%i0+1],%l1
 1088     ldub    [%i0+2],%l2
 1089 
 1090     sll %l0,24,%l0
 1091     ldub    [%i0+3],%l3
 1092     sll %l1,16,%l1
 1093     ldub    [%i0+4],%l4
 1094     sll %l2,8,%l2
 1095     or  %l1,%l0,%l0
 1096     ldub    [%i0+5],%l5
 1097     sll %l4,24,%l4
 1098     or  %l3,%l2,%l2
 1099     ldub    [%i0+6],%l6
 1100     sll %l5,16,%l5
 1101     or  %l0,%l2,%o0
 1102     ldub    [%i0+7],%l7
 1103 
 1104     sll %l6,8,%l6
 1105     or  %l5,%l4,%l4
 1106     ldub    [%i0+8],%l0
 1107     or  %l7,%l6,%l6
 1108     ldub    [%i0+9],%l1
 1109     or  %l4,%l6,%o1
 1110     ldub    [%i0+10],%l2
 1111 
 1112     sll %l0,24,%l0
 1113     ldub    [%i0+11],%l3
 1114     sll %l1,16,%l1
 1115     ldub    [%i0+12],%l4
 1116     sll %l2,8,%l2
 1117     or  %l1,%l0,%l0
 1118     ldub    [%i0+13],%l5
 1119     sll %l4,24,%l4
 1120     or  %l3,%l2,%l2
 1121     ldub    [%i0+14],%l6
 1122     sll %l5,16,%l5
 1123     or  %l0,%l2,%o2
 1124     ldub    [%i0+15],%l7
 1125 
 1126     sll %l6,8,%l6
 1127     or  %l5,%l4,%l4
 1128     or  %l7,%l6,%l6
 1129     or  %l4,%l6,%o3
 1130 
 1131 1:  call    .+8
 1132     add %o7,AES_Td-1b,%o4
 1133     call    _sparcv9_AES_decrypt
 1134     mov %i2,%o5
 1135 
 1136     srl %o0,24,%l0
 1137     srl %o0,16,%l1
 1138     stb %l0,[%i1+0]
 1139     srl %o0,8,%l2
 1140     stb %l1,[%i1+1]
 1141     stb %l2,[%i1+2]
 1142     srl %o1,24,%l4
 1143     stb %o0,[%i1+3]
 1144 
 1145     srl %o1,16,%l5
 1146     stb %l4,[%i1+4]
 1147     srl %o1,8,%l6
 1148     stb %l5,[%i1+5]
 1149     stb %l6,[%i1+6]
 1150     srl %o2,24,%l0
 1151     stb %o1,[%i1+7]
 1152 
 1153     srl %o2,16,%l1
 1154     stb %l0,[%i1+8]
 1155     srl %o2,8,%l2
 1156     stb %l1,[%i1+9]
 1157     stb %l2,[%i1+10]
 1158     srl %o3,24,%l4
 1159     stb %o2,[%i1+11]
 1160 
 1161     srl %o3,16,%l5
 1162     stb %l4,[%i1+12]
 1163     srl %o3,8,%l6
 1164     stb %l5,[%i1+13]
 1165     stb %l6,[%i1+14]
 1166     stb %o3,[%i1+15]
 1167 
 1168     ret
 1169     restore
 1170 .type   AES_decrypt,#function
 1171 .size   AES_decrypt,(.-AES_decrypt)
 1172 ___
 1173 
 1174 # fmovs instructions substituting for FP nops were originally added
 1175 # to meet specific instruction alignment requirements to maximize ILP.
 1176 # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
 1177 # undesired effect, so just omit them and sacrifice some portion of
 1178 # percent in performance...
 1179 $code =~ s/fmovs.*$//gm;
 1180 
 1181 print $code;
 1182 close STDOUT;   # ensure flush