"Fossies" - the Fresh Open Source Software Archive

Member "src/Crypto/Serpent.c" (10 Oct 2018, 19550 Bytes) of package /windows/misc/VeraCrypt_1.23-Hotfix-2_Source.zip:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "Serpent.c" see the Fossies "Dox" file reference documentation.

    1 // serpent.cpp - written and placed in the public domain by Wei Dai
    2 
    3 /* Adapted for TrueCrypt */
    4 /* Adapted for VeraCrypt */
    5 
    6 #ifdef TC_WINDOWS_BOOT
    7 #pragma optimize ("t", on)
    8 #endif
    9 
   10 #include "Serpent.h"
   11 #include "Common/Endian.h"
   12 
   13 #ifdef TC_WINDOWS_BOOT
   14 #include <stdlib.h>
   15 #pragma intrinsic(_lrotr,_lrotl)
   16 #define rotr32(x,n) _lrotr(x, n)
   17 #define rotl32(x,n) _lrotl(x, n)
   18 #else
   19 #include "Crypto/misc.h"
   20 #endif
   21 
   22 #if !defined(_UEFI)
   23 #include <memory.h>
   24 #endif // !defined(_UEFI)
   25 
   26 // linear transformation
   27 #define LT(i,a,b,c,d,e) {\
   28     a = rotl32(a, 13);  \
   29     c = rotl32(c, 3);   \
   30     d = rotl32(d ^ c ^ (a << 3), 7);    \
   31     b = rotl32(b ^ a ^ c, 1);   \
   32     a = rotl32(a ^ b ^ d, 5);       \
   33     c = rotl32(c ^ d ^ (b << 7), 22);}
   34 
   35 // inverse linear transformation
   36 #define ILT(i,a,b,c,d,e)    {\
   37     c = rotr32(c, 22);  \
   38     a = rotr32(a, 5);   \
   39     c ^= d ^ (b << 7);  \
   40     a ^= b ^ d;         \
   41     b = rotr32(b, 1);   \
   42     d = rotr32(d, 7) ^ c ^ (a << 3);    \
   43     b ^= a ^ c;         \
   44     c = rotr32(c, 3);   \
   45     a = rotr32(a, 13);}
   46 
   47 // order of output from S-box functions
   48 #define beforeS0(f) f(0,a,b,c,d,e)
   49 #define afterS0(f) f(1,b,e,c,a,d)
   50 #define afterS1(f) f(2,c,b,a,e,d)
   51 #define afterS2(f) f(3,a,e,b,d,c)
   52 #define afterS3(f) f(4,e,b,d,c,a)
   53 #define afterS4(f) f(5,b,a,e,c,d)
   54 #define afterS5(f) f(6,a,c,b,e,d)
   55 #define afterS6(f) f(7,a,c,d,b,e)
   56 #define afterS7(f) f(8,d,e,b,a,c)
   57 
   58 // order of output from inverse S-box functions
   59 #define beforeI7(f) f(8,a,b,c,d,e)
   60 #define afterI7(f) f(7,d,a,b,e,c)
   61 #define afterI6(f) f(6,a,b,c,e,d)
   62 #define afterI5(f) f(5,b,d,e,c,a)
   63 #define afterI4(f) f(4,b,c,e,a,d)
   64 #define afterI3(f) f(3,a,b,e,c,d)
   65 #define afterI2(f) f(2,b,d,e,c,a)
   66 #define afterI1(f) f(1,a,b,c,e,d)
   67 #define afterI0(f) f(0,a,d,b,e,c)
   68 
   69 // The instruction sequences for the S-box functions
   70 // come from Dag Arne Osvik's paper "Speeding up Serpent".
   71 
   72 #define S0(i, r0, r1, r2, r3, r4) \
   73        {           \
   74     r3 ^= r0;   \
   75     r4 = r1;   \
   76     r1 &= r3;   \
   77     r4 ^= r2;   \
   78     r1 ^= r0;   \
   79     r0 |= r3;   \
   80     r0 ^= r4;   \
   81     r4 ^= r3;   \
   82     r3 ^= r2;   \
   83     r2 |= r1;   \
   84     r2 ^= r4;   \
   85     r4 = ~r4;      \
   86     r4 |= r1;   \
   87     r1 ^= r3;   \
   88     r1 ^= r4;   \
   89     r3 |= r0;   \
   90     r1 ^= r3;   \
   91     r4 ^= r3;   \
   92             }
   93 
   94 #define I0(i, r0, r1, r2, r3, r4) \
   95        {           \
   96     r2 = ~r2;      \
   97     r4 = r1;   \
   98     r1 |= r0;   \
   99     r4 = ~r4;      \
  100     r1 ^= r2;   \
  101     r2 |= r4;   \
  102     r1 ^= r3;   \
  103     r0 ^= r4;   \
  104     r2 ^= r0;   \
  105     r0 &= r3;   \
  106     r4 ^= r0;   \
  107     r0 |= r1;   \
  108     r0 ^= r2;   \
  109     r3 ^= r4;   \
  110     r2 ^= r1;   \
  111     r3 ^= r0;   \
  112     r3 ^= r1;   \
  113     r2 &= r3;   \
  114     r4 ^= r2;   \
  115             }
  116 
  117 #define S1(i, r0, r1, r2, r3, r4) \
  118        {           \
  119     r0 = ~r0;      \
  120     r2 = ~r2;      \
  121     r4 = r0;   \
  122     r0 &= r1;   \
  123     r2 ^= r0;   \
  124     r0 |= r3;   \
  125     r3 ^= r2;   \
  126     r1 ^= r0;   \
  127     r0 ^= r4;   \
  128     r4 |= r1;   \
  129     r1 ^= r3;   \
  130     r2 |= r0;   \
  131     r2 &= r4;   \
  132     r0 ^= r1;   \
  133     r1 &= r2;   \
  134     r1 ^= r0;   \
  135     r0 &= r2;   \
  136     r0 ^= r4;   \
  137             }
  138 
  139 #define I1(i, r0, r1, r2, r3, r4) \
  140        {           \
  141     r4 = r1;   \
  142     r1 ^= r3;   \
  143     r3 &= r1;   \
  144     r4 ^= r2;   \
  145     r3 ^= r0;   \
  146     r0 |= r1;   \
  147     r2 ^= r3;   \
  148     r0 ^= r4;   \
  149     r0 |= r2;   \
  150     r1 ^= r3;   \
  151     r0 ^= r1;   \
  152     r1 |= r3;   \
  153     r1 ^= r0;   \
  154     r4 = ~r4;      \
  155     r4 ^= r1;   \
  156     r1 |= r0;   \
  157     r1 ^= r0;   \
  158     r1 |= r4;   \
  159     r3 ^= r1;   \
  160             }
  161 
  162 #define S2(i, r0, r1, r2, r3, r4) \
  163        {           \
  164     r4 = r0;   \
  165     r0 &= r2;   \
  166     r0 ^= r3;   \
  167     r2 ^= r1;   \
  168     r2 ^= r0;   \
  169     r3 |= r4;   \
  170     r3 ^= r1;   \
  171     r4 ^= r2;   \
  172     r1 = r3;   \
  173     r3 |= r4;   \
  174     r3 ^= r0;   \
  175     r0 &= r1;   \
  176     r4 ^= r0;   \
  177     r1 ^= r3;   \
  178     r1 ^= r4;   \
  179     r4 = ~r4;      \
  180             }
  181 
  182 #define I2(i, r0, r1, r2, r3, r4) \
  183        {           \
  184     r2 ^= r3;   \
  185     r3 ^= r0;   \
  186     r4 = r3;   \
  187     r3 &= r2;   \
  188     r3 ^= r1;   \
  189     r1 |= r2;   \
  190     r1 ^= r4;   \
  191     r4 &= r3;   \
  192     r2 ^= r3;   \
  193     r4 &= r0;   \
  194     r4 ^= r2;   \
  195     r2 &= r1;   \
  196     r2 |= r0;   \
  197     r3 = ~r3;      \
  198     r2 ^= r3;   \
  199     r0 ^= r3;   \
  200     r0 &= r1;   \
  201     r3 ^= r4;   \
  202     r3 ^= r0;   \
  203             }
  204 
  205 #define S3(i, r0, r1, r2, r3, r4) \
  206        {           \
  207     r4 = r0;   \
  208     r0 |= r3;   \
  209     r3 ^= r1;   \
  210     r1 &= r4;   \
  211     r4 ^= r2;   \
  212     r2 ^= r3;   \
  213     r3 &= r0;   \
  214     r4 |= r1;   \
  215     r3 ^= r4;   \
  216     r0 ^= r1;   \
  217     r4 &= r0;   \
  218     r1 ^= r3;   \
  219     r4 ^= r2;   \
  220     r1 |= r0;   \
  221     r1 ^= r2;   \
  222     r0 ^= r3;   \
  223     r2 = r1;   \
  224     r1 |= r3;   \
  225     r1 ^= r0;   \
  226             }
  227 
  228 #define I3(i, r0, r1, r2, r3, r4) \
  229        {           \
  230     r4 = r2;   \
  231     r2 ^= r1;   \
  232     r1 &= r2;   \
  233     r1 ^= r0;   \
  234     r0 &= r4;   \
  235     r4 ^= r3;   \
  236     r3 |= r1;   \
  237     r3 ^= r2;   \
  238     r0 ^= r4;   \
  239     r2 ^= r0;   \
  240     r0 |= r3;   \
  241     r0 ^= r1;   \
  242     r4 ^= r2;   \
  243     r2 &= r3;   \
  244     r1 |= r3;   \
  245     r1 ^= r2;   \
  246     r4 ^= r0;   \
  247     r2 ^= r4;   \
  248             }
  249 
  250 #define S4(i, r0, r1, r2, r3, r4) \
  251        {           \
  252     r1 ^= r3;   \
  253     r3 = ~r3;      \
  254     r2 ^= r3;   \
  255     r3 ^= r0;   \
  256     r4 = r1;   \
  257     r1 &= r3;   \
  258     r1 ^= r2;   \
  259     r4 ^= r3;   \
  260     r0 ^= r4;   \
  261     r2 &= r4;   \
  262     r2 ^= r0;   \
  263     r0 &= r1;   \
  264     r3 ^= r0;   \
  265     r4 |= r1;   \
  266     r4 ^= r0;   \
  267     r0 |= r3;   \
  268     r0 ^= r2;   \
  269     r2 &= r3;   \
  270     r0 = ~r0;      \
  271     r4 ^= r2;   \
  272             }
  273 
  274 #define I4(i, r0, r1, r2, r3, r4) \
  275        {           \
  276     r4 = r2;   \
  277     r2 &= r3;   \
  278     r2 ^= r1;   \
  279     r1 |= r3;   \
  280     r1 &= r0;   \
  281     r4 ^= r2;   \
  282     r4 ^= r1;   \
  283     r1 &= r2;   \
  284     r0 = ~r0;      \
  285     r3 ^= r4;   \
  286     r1 ^= r3;   \
  287     r3 &= r0;   \
  288     r3 ^= r2;   \
  289     r0 ^= r1;   \
  290     r2 &= r0;   \
  291     r3 ^= r0;   \
  292     r2 ^= r4;   \
  293     r2 |= r3;   \
  294     r3 ^= r0;   \
  295     r2 ^= r1;   \
  296             }
  297 
  298 #define S5(i, r0, r1, r2, r3, r4) \
  299        {           \
  300     r0 ^= r1;   \
  301     r1 ^= r3;   \
  302     r3 = ~r3;      \
  303     r4 = r1;   \
  304     r1 &= r0;   \
  305     r2 ^= r3;   \
  306     r1 ^= r2;   \
  307     r2 |= r4;   \
  308     r4 ^= r3;   \
  309     r3 &= r1;   \
  310     r3 ^= r0;   \
  311     r4 ^= r1;   \
  312     r4 ^= r2;   \
  313     r2 ^= r0;   \
  314     r0 &= r3;   \
  315     r2 = ~r2;      \
  316     r0 ^= r4;   \
  317     r4 |= r3;   \
  318     r2 ^= r4;   \
  319             }
  320 
  321 #define I5(i, r0, r1, r2, r3, r4) \
  322        {           \
  323     r1 = ~r1;      \
  324     r4 = r3;   \
  325     r2 ^= r1;   \
  326     r3 |= r0;   \
  327     r3 ^= r2;   \
  328     r2 |= r1;   \
  329     r2 &= r0;   \
  330     r4 ^= r3;   \
  331     r2 ^= r4;   \
  332     r4 |= r0;   \
  333     r4 ^= r1;   \
  334     r1 &= r2;   \
  335     r1 ^= r3;   \
  336     r4 ^= r2;   \
  337     r3 &= r4;   \
  338     r4 ^= r1;   \
  339     r3 ^= r0;   \
  340     r3 ^= r4;   \
  341     r4 = ~r4;      \
  342             }
  343 
  344 #define S6(i, r0, r1, r2, r3, r4) \
  345        {           \
  346     r2 = ~r2;      \
  347     r4 = r3;   \
  348     r3 &= r0;   \
  349     r0 ^= r4;   \
  350     r3 ^= r2;   \
  351     r2 |= r4;   \
  352     r1 ^= r3;   \
  353     r2 ^= r0;   \
  354     r0 |= r1;   \
  355     r2 ^= r1;   \
  356     r4 ^= r0;   \
  357     r0 |= r3;   \
  358     r0 ^= r2;   \
  359     r4 ^= r3;   \
  360     r4 ^= r0;   \
  361     r3 = ~r3;      \
  362     r2 &= r4;   \
  363     r2 ^= r3;   \
  364             }
  365 
  366 #define I6(i, r0, r1, r2, r3, r4) \
  367        {           \
  368     r0 ^= r2;   \
  369     r4 = r2;   \
  370     r2 &= r0;   \
  371     r4 ^= r3;   \
  372     r2 = ~r2;      \
  373     r3 ^= r1;   \
  374     r2 ^= r3;   \
  375     r4 |= r0;   \
  376     r0 ^= r2;   \
  377     r3 ^= r4;   \
  378     r4 ^= r1;   \
  379     r1 &= r3;   \
  380     r1 ^= r0;   \
  381     r0 ^= r3;   \
  382     r0 |= r2;   \
  383     r3 ^= r1;   \
  384     r4 ^= r0;   \
  385             }
  386 
  387 #define S7(i, r0, r1, r2, r3, r4) \
  388        {           \
  389     r4 = r2;   \
  390     r2 &= r1;   \
  391     r2 ^= r3;   \
  392     r3 &= r1;   \
  393     r4 ^= r2;   \
  394     r2 ^= r1;   \
  395     r1 ^= r0;   \
  396     r0 |= r4;   \
  397     r0 ^= r2;   \
  398     r3 ^= r1;   \
  399     r2 ^= r3;   \
  400     r3 &= r0;   \
  401     r3 ^= r4;   \
  402     r4 ^= r2;   \
  403     r2 &= r0;   \
  404     r4 = ~r4;      \
  405     r2 ^= r4;   \
  406     r4 &= r0;   \
  407     r1 ^= r3;   \
  408     r4 ^= r1;   \
  409             }
  410 
  411 #define I7(i, r0, r1, r2, r3, r4) \
  412        {           \
  413     r4 = r2;   \
  414     r2 ^= r0;   \
  415     r0 &= r3;   \
  416     r2 = ~r2;      \
  417     r4 |= r3;   \
  418     r3 ^= r1;   \
  419     r1 |= r0;   \
  420     r0 ^= r2;   \
  421     r2 &= r4;   \
  422     r1 ^= r2;   \
  423     r2 ^= r0;   \
  424     r0 |= r2;   \
  425     r3 &= r4;   \
  426     r0 ^= r3;   \
  427     r4 ^= r1;   \
  428     r3 ^= r4;   \
  429     r4 |= r0;   \
  430     r3 ^= r2;   \
  431     r4 ^= r2;   \
  432             }
  433 
  434 // key xor
  435 #define KX(r, a, b, c, d, e)    {\
  436     a ^= k[4 * r + 0]; \
  437     b ^= k[4 * r + 1]; \
  438     c ^= k[4 * r + 2]; \
  439     d ^= k[4 * r + 3];}
  440 
  441 
  442 #ifdef TC_MINIMIZE_CODE_SIZE
  443 
  444 static void S0f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4)
  445 {
  446     *r3 ^= *r0;
  447     *r4 = *r1;
  448     *r1 &= *r3;
  449     *r4 ^= *r2;
  450     *r1 ^= *r0;
  451     *r0 |= *r3;
  452     *r0 ^= *r4;
  453     *r4 ^= *r3;
  454     *r3 ^= *r2;
  455     *r2 |= *r1;
  456     *r2 ^= *r4;
  457     *r4 = ~*r4;
  458     *r4 |= *r1;
  459     *r1 ^= *r3;
  460     *r1 ^= *r4;
  461     *r3 |= *r0;
  462     *r1 ^= *r3;
  463     *r4 ^= *r3;
  464 }
  465 
  466 static void S1f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4)
  467 {
  468     *r0 = ~*r0;
  469     *r2 = ~*r2;
  470     *r4 = *r0;
  471     *r0 &= *r1;
  472     *r2 ^= *r0;
  473     *r0 |= *r3;
  474     *r3 ^= *r2;
  475     *r1 ^= *r0;
  476     *r0 ^= *r4;
  477     *r4 |= *r1;
  478     *r1 ^= *r3;
  479     *r2 |= *r0;
  480     *r2 &= *r4;
  481     *r0 ^= *r1;
  482     *r1 &= *r2;
  483     *r1 ^= *r0;
  484     *r0 &= *r2;
  485     *r0 ^= *r4;
  486 }
  487 
  488 static void S2f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4)
  489 {
  490     *r4 = *r0;
  491     *r0 &= *r2;
  492     *r0 ^= *r3;
  493     *r2 ^= *r1;
  494     *r2 ^= *r0;
  495     *r3 |= *r4;
  496     *r3 ^= *r1;
  497     *r4 ^= *r2;
  498     *r1 = *r3;
  499     *r3 |= *r4;
  500     *r3 ^= *r0;
  501     *r0 &= *r1;
  502     *r4 ^= *r0;
  503     *r1 ^= *r3;
  504     *r1 ^= *r4;
  505     *r4 = ~*r4;
  506 }
  507 
  508 static void S3f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4)
  509 {
  510     *r4 = *r0;
  511     *r0 |= *r3;
  512     *r3 ^= *r1;
  513     *r1 &= *r4;
  514     *r4 ^= *r2;
  515     *r2 ^= *r3;
  516     *r3 &= *r0;
  517     *r4 |= *r1;
  518     *r3 ^= *r4;
  519     *r0 ^= *r1;
  520     *r4 &= *r0;
  521     *r1 ^= *r3;
  522     *r4 ^= *r2;
  523     *r1 |= *r0;
  524     *r1 ^= *r2;
  525     *r0 ^= *r3;
  526     *r2 = *r1;
  527     *r1 |= *r3;
  528     *r1 ^= *r0;
  529 }
  530 
  531 static void S4f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4)
  532 {
  533     *r1 ^= *r3;
  534     *r3 = ~*r3;
  535     *r2 ^= *r3;
  536     *r3 ^= *r0;
  537     *r4 = *r1;
  538     *r1 &= *r3;
  539     *r1 ^= *r2;
  540     *r4 ^= *r3;
  541     *r0 ^= *r4;
  542     *r2 &= *r4;
  543     *r2 ^= *r0;
  544     *r0 &= *r1;
  545     *r3 ^= *r0;
  546     *r4 |= *r1;
  547     *r4 ^= *r0;
  548     *r0 |= *r3;
  549     *r0 ^= *r2;
  550     *r2 &= *r3;
  551     *r0 = ~*r0;
  552     *r4 ^= *r2;
  553 }
  554 
  555 static void S5f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4)
  556 {
  557     *r0 ^= *r1;
  558     *r1 ^= *r3;
  559     *r3 = ~*r3;
  560     *r4 = *r1;
  561     *r1 &= *r0;
  562     *r2 ^= *r3;
  563     *r1 ^= *r2;
  564     *r2 |= *r4;
  565     *r4 ^= *r3;
  566     *r3 &= *r1;
  567     *r3 ^= *r0;
  568     *r4 ^= *r1;
  569     *r4 ^= *r2;
  570     *r2 ^= *r0;
  571     *r0 &= *r3;
  572     *r2 = ~*r2;
  573     *r0 ^= *r4;
  574     *r4 |= *r3;
  575     *r2 ^= *r4;
  576 }
  577 
  578 static void S6f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4)
  579 {
  580     *r2 = ~*r2;
  581     *r4 = *r3;
  582     *r3 &= *r0;
  583     *r0 ^= *r4;
  584     *r3 ^= *r2;
  585     *r2 |= *r4;
  586     *r1 ^= *r3;
  587     *r2 ^= *r0;
  588     *r0 |= *r1;
  589     *r2 ^= *r1;
  590     *r4 ^= *r0;
  591     *r0 |= *r3;
  592     *r0 ^= *r2;
  593     *r4 ^= *r3;
  594     *r4 ^= *r0;
  595     *r3 = ~*r3;
  596     *r2 &= *r4;
  597     *r2 ^= *r3;
  598 }
  599 
  600 static void S7f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4)
  601 {
  602     *r4 = *r2;
  603     *r2 &= *r1;
  604     *r2 ^= *r3;
  605     *r3 &= *r1;
  606     *r4 ^= *r2;
  607     *r2 ^= *r1;
  608     *r1 ^= *r0;
  609     *r0 |= *r4;
  610     *r0 ^= *r2;
  611     *r3 ^= *r1;
  612     *r2 ^= *r3;
  613     *r3 &= *r0;
  614     *r3 ^= *r4;
  615     *r4 ^= *r2;
  616     *r2 &= *r0;
  617     *r4 = ~*r4;
  618     *r2 ^= *r4;
  619     *r4 &= *r0;
  620     *r1 ^= *r3;
  621     *r4 ^= *r1;
  622 }
  623 
  624 static void KXf (const unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d)
  625 {
  626     *a ^= k[r];
  627     *b ^= k[r + 1];
  628     *c ^= k[r + 2];
  629     *d ^= k[r + 3];
  630 }
  631 
  632 #endif // TC_MINIMIZE_CODE_SIZE
  633 
  634 #ifndef TC_MINIMIZE_CODE_SIZE
  635 
  636 void serpent_set_key(const unsigned __int8 userKey[],unsigned __int8 *ks)
  637 {
  638     unsigned __int32 a,b,c,d,e;
  639     unsigned __int32 *k = (unsigned __int32 *)ks;
  640     unsigned __int32 t;
  641     int i;
  642 
  643     for (i = 0; i < 8; i++)
  644         k[i] = LE32(((unsigned __int32*)userKey)[i]);
  645 
  646     k += 8;
  647     t = k[-1];
  648     for (i = 0; i < 132; ++i)
  649         k[i] = t = rotl32(k[i-8] ^ k[i-5] ^ k[i-3] ^ t ^ 0x9e3779b9 ^ i, 11);
  650     k -= 20;
  651 
  652 #define LK(r, a, b, c, d, e)    {\
  653     a = k[(8-r)*4 + 0];     \
  654     b = k[(8-r)*4 + 1];     \
  655     c = k[(8-r)*4 + 2];     \
  656     d = k[(8-r)*4 + 3];}
  657 
  658 #define SK(r, a, b, c, d, e)    {\
  659     k[(8-r)*4 + 4] = a;     \
  660     k[(8-r)*4 + 5] = b;     \
  661     k[(8-r)*4 + 6] = c;     \
  662     k[(8-r)*4 + 7] = d;}    \
  663 
  664     for (i=0; i<4; i++)
  665     {
  666         afterS2(LK); afterS2(S3); afterS3(SK);
  667         afterS1(LK); afterS1(S2); afterS2(SK);
  668         afterS0(LK); afterS0(S1); afterS1(SK);
  669         beforeS0(LK); beforeS0(S0); afterS0(SK);
  670         k += 8*4;
  671         afterS6(LK); afterS6(S7); afterS7(SK);
  672         afterS5(LK); afterS5(S6); afterS6(SK);
  673         afterS4(LK); afterS4(S5); afterS5(SK);
  674         afterS3(LK); afterS3(S4); afterS4(SK);
  675     }
  676     afterS2(LK); afterS2(S3); afterS3(SK);
  677 }
  678 
  679 #else // TC_MINIMIZE_CODE_SIZE
  680 
  681 static void LKf (unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d)
  682 {
  683     *a = k[r];
  684     *b = k[r + 1];
  685     *c = k[r + 2];
  686     *d = k[r + 3];
  687 }
  688 
  689 static void SKf (unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d)
  690 {
  691     k[r + 4] = *a;
  692     k[r + 5] = *b;
  693     k[r + 6] = *c;
  694     k[r + 7] = *d;
  695 }
  696 
  697 void serpent_set_key(const unsigned __int8 userKey[], unsigned __int8 *ks)
  698 {
  699     unsigned __int32 a,b,c,d,e;
  700     unsigned __int32 *k = (unsigned __int32 *)ks;
  701     unsigned __int32 t;
  702     int i;
  703 
  704     for (i = 0; i < 8; i++)
  705         k[i] = LE32(((unsigned __int32*)userKey)[i]);
  706 
  707     k += 8;
  708     t = k[-1];
  709     for (i = 0; i < 132; ++i)
  710         k[i] = t = rotl32(k[i-8] ^ k[i-5] ^ k[i-3] ^ t ^ 0x9e3779b9 ^ i, 11);
  711     k -= 20;
  712 
  713     for (i=0; i<4; i++)
  714     {
  715         LKf (k, 20, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); SKf (k, 16, &e, &b, &d, &c);
  716         LKf (k, 24, &c, &b, &a, &e); S2f (&c, &b, &a, &e, &d); SKf (k, 20, &a, &e, &b, &d);
  717         LKf (k, 28, &b, &e, &c, &a); S1f (&b, &e, &c, &a, &d); SKf (k, 24, &c, &b, &a, &e);
  718         LKf (k, 32, &a, &b, &c, &d); S0f (&a, &b, &c, &d, &e); SKf (k, 28, &b, &e, &c, &a);
  719         k += 8*4;
  720         LKf (k,  4, &a, &c, &d, &b); S7f (&a, &c, &d, &b, &e); SKf (k,  0, &d, &e, &b, &a);
  721         LKf (k,  8, &a, &c, &b, &e); S6f (&a, &c, &b, &e, &d); SKf (k,  4, &a, &c, &d, &b);
  722         LKf (k, 12, &b, &a, &e, &c); S5f (&b, &a, &e, &c, &d); SKf (k,  8, &a, &c, &b, &e);
  723         LKf (k, 16, &e, &b, &d, &c); S4f (&e, &b, &d, &c, &a); SKf (k, 12, &b, &a, &e, &c);
  724     }
  725     LKf (k, 20, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); SKf (k, 16, &e, &b, &d, &c);
  726 }
  727 
  728 #endif // TC_MINIMIZE_CODE_SIZE
  729 
  730 
  731 #ifndef TC_MINIMIZE_CODE_SIZE
  732 
  733 void serpent_encrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks)
  734 {
  735     unsigned __int32 a, b, c, d, e;
  736     unsigned int i=1;
  737     const unsigned __int32 *k = (unsigned __int32 *)ks + 8;
  738     unsigned __int32 *in = (unsigned __int32 *) inBlock;
  739     unsigned __int32 *out = (unsigned __int32 *) outBlock;
  740 
  741     a = LE32(in[0]);
  742     b = LE32(in[1]);
  743     c = LE32(in[2]);
  744     d = LE32(in[3]);
  745 
  746     do
  747     {
  748         beforeS0(KX); beforeS0(S0); afterS0(LT);
  749         afterS0(KX); afterS0(S1); afterS1(LT);
  750         afterS1(KX); afterS1(S2); afterS2(LT);
  751         afterS2(KX); afterS2(S3); afterS3(LT);
  752         afterS3(KX); afterS3(S4); afterS4(LT);
  753         afterS4(KX); afterS4(S5); afterS5(LT);
  754         afterS5(KX); afterS5(S6); afterS6(LT);
  755         afterS6(KX); afterS6(S7);
  756 
  757         if (i == 4)
  758             break;
  759 
  760         ++i;
  761         c = b;
  762         b = e;
  763         e = d;
  764         d = a;
  765         a = e;
  766         k += 32;
  767         beforeS0(LT);
  768     }
  769     while (1);
  770 
  771     afterS7(KX);
  772 
  773     out[0] = LE32(d);
  774     out[1] = LE32(e);
  775     out[2] = LE32(b);
  776     out[3] = LE32(a);
  777 }
  778 
  779 #else // TC_MINIMIZE_CODE_SIZE
  780 
  781 typedef unsigned __int32 uint32;
  782 
  783 static void LTf (uint32 *a, uint32 *b, uint32 *c, uint32 *d)
  784 {
  785     *a = rotl32(*a, 13);
  786     *c = rotl32(*c, 3);
  787     *d = rotl32(*d ^ *c ^ (*a << 3), 7);
  788     *b = rotl32(*b ^ *a ^ *c, 1);
  789     *a = rotl32(*a ^ *b ^ *d, 5);
  790     *c = rotl32(*c ^ *d ^ (*b << 7), 22);
  791 }
  792 
  793 void serpent_encrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks)
  794 {
  795     unsigned __int32 a, b, c, d, e;
  796     unsigned int i=1;
  797     const unsigned __int32 *k = (unsigned __int32 *)ks + 8;
  798     unsigned __int32 *in = (unsigned __int32 *) inBlock;
  799     unsigned __int32 *out = (unsigned __int32 *) outBlock;
  800 
  801     a = LE32(in[0]);
  802     b = LE32(in[1]);
  803     c = LE32(in[2]);
  804     d = LE32(in[3]);
  805 
  806     do
  807     {
  808         KXf (k,  0, &a, &b, &c, &d); S0f (&a, &b, &c, &d, &e); LTf (&b, &e, &c, &a);
  809         KXf (k,  4, &b, &e, &c, &a); S1f (&b, &e, &c, &a, &d); LTf (&c, &b, &a, &e);
  810         KXf (k,  8, &c, &b, &a, &e); S2f (&c, &b, &a, &e, &d); LTf (&a, &e, &b, &d);
  811         KXf (k, 12, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); LTf (&e, &b, &d, &c);
  812         KXf (k, 16, &e, &b, &d, &c); S4f (&e, &b, &d, &c, &a); LTf (&b, &a, &e, &c);
  813         KXf (k, 20, &b, &a, &e, &c); S5f (&b, &a, &e, &c, &d); LTf (&a, &c, &b, &e);
  814         KXf (k, 24, &a, &c, &b, &e); S6f (&a, &c, &b, &e, &d); LTf (&a, &c, &d, &b);
  815         KXf (k, 28, &a, &c, &d, &b); S7f (&a, &c, &d, &b, &e);
  816 
  817         if (i == 4)
  818             break;
  819 
  820         ++i;
  821         c = b;
  822         b = e;
  823         e = d;
  824         d = a;
  825         a = e;
  826         k += 32;
  827         LTf (&a,&b,&c,&d);
  828     }
  829     while (1);
  830 
  831     KXf (k, 32, &d, &e, &b, &a);
  832 
  833     out[0] = LE32(d);
  834     out[1] = LE32(e);
  835     out[2] = LE32(b);
  836     out[3] = LE32(a);
  837 }
  838 
  839 #endif // TC_MINIMIZE_CODE_SIZE
  840 
  841 #if !defined (TC_MINIMIZE_CODE_SIZE)
  842 
  843 void serpent_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks)
  844 {
  845     unsigned __int32 a, b, c, d, e;
  846     const unsigned __int32 *k = (unsigned __int32 *)ks + 104;
  847     unsigned int i=4;
  848     unsigned __int32 *in = (unsigned __int32 *) inBlock;
  849     unsigned __int32 *out = (unsigned __int32 *) outBlock;
  850 
  851     a = LE32(in[0]);
  852     b = LE32(in[1]);
  853     c = LE32(in[2]);
  854     d = LE32(in[3]);
  855 
  856     beforeI7(KX);
  857     goto start;
  858 
  859     do
  860     {
  861         c = b;
  862         b = d;
  863         d = e;
  864         k -= 32;
  865         beforeI7(ILT);
  866 start:
  867         beforeI7(I7); afterI7(KX);
  868         afterI7(ILT); afterI7(I6); afterI6(KX);
  869         afterI6(ILT); afterI6(I5); afterI5(KX);
  870         afterI5(ILT); afterI5(I4); afterI4(KX);
  871         afterI4(ILT); afterI4(I3); afterI3(KX);
  872         afterI3(ILT); afterI3(I2); afterI2(KX);
  873         afterI2(ILT); afterI2(I1); afterI1(KX);
  874         afterI1(ILT); afterI1(I0); afterI0(KX);
  875     }
  876     while (--i != 0);
  877 
  878     out[0] = LE32(a);
  879     out[1] = LE32(d);
  880     out[2] = LE32(b);
  881     out[3] = LE32(e);
  882 }
  883 
  884 #else // TC_MINIMIZE_CODE_SIZE
  885 
  886 static void ILTf (uint32 *a, uint32 *b, uint32 *c, uint32 *d)
  887 {
  888     *c = rotr32(*c, 22);
  889     *a = rotr32(*a, 5);
  890     *c ^= *d ^ (*b << 7);
  891     *a ^= *b ^ *d;
  892     *b = rotr32(*b, 1);
  893     *d = rotr32(*d, 7) ^ *c ^ (*a << 3);
  894     *b ^= *a ^ *c;
  895     *c = rotr32(*c, 3);
  896     *a = rotr32(*a, 13);
  897 }
  898 
  899 void serpent_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks)
  900 {
  901     unsigned __int32 a, b, c, d, e;
  902     const unsigned __int32 *k = (unsigned __int32 *)ks + 104;
  903     unsigned int i=4;
  904     unsigned __int32 *in = (unsigned __int32 *) inBlock;
  905     unsigned __int32 *out = (unsigned __int32 *) outBlock;
  906 
  907     a = LE32(in[0]);
  908     b = LE32(in[1]);
  909     c = LE32(in[2]);
  910     d = LE32(in[3]);
  911 
  912     KXf (k, 32, &a, &b, &c, &d);
  913     goto start;
  914 
  915     do
  916     {
  917         c = b;
  918         b = d;
  919         d = e;
  920         k -= 32;
  921         beforeI7(ILT);
  922 start:
  923         beforeI7(I7); KXf (k, 28, &d, &a, &b, &e);
  924         ILTf (&d, &a, &b, &e); afterI7(I6); KXf (k, 24, &a, &b, &c, &e);
  925         ILTf (&a, &b, &c, &e); afterI6(I5); KXf (k, 20, &b, &d, &e, &c);
  926         ILTf (&b, &d, &e, &c); afterI5(I4); KXf (k, 16, &b, &c, &e, &a);
  927         ILTf (&b, &c, &e, &a); afterI4(I3); KXf (k, 12, &a, &b, &e, &c);
  928         ILTf (&a, &b, &e, &c); afterI3(I2); KXf (k, 8,  &b, &d, &e, &c);
  929         ILTf (&b, &d, &e, &c); afterI2(I1); KXf (k, 4,  &a, &b, &c, &e);
  930         ILTf (&a, &b, &c, &e); afterI1(I0); KXf (k, 0,  &a, &d, &b, &e);
  931     }
  932     while (--i != 0);
  933 
  934     out[0] = LE32(a);
  935     out[1] = LE32(d);
  936     out[2] = LE32(b);
  937     out[3] = LE32(e);
  938 }
  939 
  940 #endif // TC_MINIMIZE_CODE_SIZE