"Fossies" - the Fresh Open Source Software Archive

Member "hashcat-6.2.6/OpenCL/m01500_a3-pure.cl" (2 Sep 2022, 126965 Bytes) of package /linux/privat/hashcat-6.2.6.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Lisp source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. See also the latest Fossies "Diffs" side-by-side code changes report for "m01500_a3-pure.cl": 6.2.5_vs_6.2.6.

A hint: This file contains one or more very long lines, so maybe it is better readable using the pure text view mode that shows the contents as wrapped lines within the browser window.


    1 /**
    2  * Author......: See docs/credits.txt
    3  * License.....: MIT
    4  * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below
    5  *             : sboxes for others were takes fron JtR, license below
    6  */
    7 
    8 #ifdef KERNEL_STATIC
    9 #include M2S(INCLUDE_PATH/inc_vendor.h)
   10 #include M2S(INCLUDE_PATH/inc_types.h)
   11 #include M2S(INCLUDE_PATH/inc_platform.cl)
   12 #include M2S(INCLUDE_PATH/inc_common.cl)
   13 #endif
   14 
   15 #define COMPARE_S M2S(INCLUDE_PATH/inc_comp_single_bs.cl)
   16 #define COMPARE_M M2S(INCLUDE_PATH/inc_comp_multi_bs.cl)
   17 
   18 #ifdef IS_NV
   19 #define KXX_DECL
   20 #endif
   21 
   22 #if (defined IS_AMD || defined IS_HIP)
   23 #define KXX_DECL
   24 #endif
   25 
   26 #ifdef IS_GENERIC
   27 #define KXX_DECL
   28 #endif
   29 
   30 #ifdef IS_NV
   31 
   32 #if CUDA_ARCH >= 500
   33 
   34 //
   35 // Bitslice DES S-boxes with LOP3.LUT instructions
   36 // For NVIDIA Maxwell architecture and CUDA 7.5 RC
   37 // by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
   38 //
   39 // Gate counts: 25 24 25 18 25 24 24 23
   40 // Average: 23.5
   41 // Depth: 8 7 7 6 8 10 10 8
   42 // Average: 8
   43 //
   44 // Note that same S-box function with a lower gate count isn't necessarily faster.
   45 //
   46 // These Boolean expressions corresponding to DES S-boxes were
   47 // discovered by <deeplearningjohndoe at gmail.com>
   48 //
   49 // This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
   50 // Redistribution and use in source and binary forms, with or without
   51 // modification, are permitted.
   52 //
   53 // The underlying mathematical formulas are NOT copyrighted.
   54 //
   55 
   56 #define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
   57 
   58 DECLSPEC void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
   59 {
   60   LUT (xAA55AA5500550055, a1, a4, a6, 0xC1)
   61   LUT (xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
   62   LUT (x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
   63   LUT (xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
   64   LUT (x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
   65   LUT (x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
   66   LUT (xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
   67   LUT (x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
   68   LUT (x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
   69   LUT (x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
   70   LUT (x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
   71   LUT (xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
   72   LUT (x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
   73   LUT (x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
   74   LUT (xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
   75   LUT (x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
   76   LUT (x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
   77   LUT (x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
   78   LUT (x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
   79   LUT (x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
   80   LUT (x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
   81   LUT (x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
   82   LUT (x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
   83   LUT (xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
   84   LUT (x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)
   85 
   86   *out1 ^= x1;
   87   *out2 ^= x2;
   88   *out3 ^= x3;
   89   *out4 ^= x4;
   90 }
   91 
   92 DECLSPEC void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
   93 {
   94   LUT (xEEEEEEEE99999999, a1, a2, a6, 0x97)
   95   LUT (xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
   96   LUT (x5555FFFFFFFF0000, a1, a5, a6, 0x76)
   97   LUT (x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
   98   LUT (x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
   99   LUT (xCFCF3030CFCF3030, a2, a3, a5, 0x65)
  100   LUT (xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
  101   LUT (xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
  102   LUT (x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
  103   LUT (x3333CCCC00000000, a2, a5, a6, 0x14)
  104   LUT (xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
  105   LUT (x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
  106   LUT (x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
  107   LUT (x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
  108   LUT (x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
  109   LUT (x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
  110   LUT (x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
  111   LUT (x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
  112   LUT (x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
  113   LUT (xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
  114   LUT (xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
  115   LUT (xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
  116   LUT (xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
  117   LUT (x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)
  118 
  119   *out1 ^= x1;
  120   *out2 ^= x2;
  121   *out3 ^= x3;
  122   *out4 ^= x4;
  123 }
  124 
  125 DECLSPEC void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  126 {
  127   LUT (xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
  128   LUT (xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
  129   LUT (xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
  130   LUT (x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
  131   LUT (xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
  132   LUT (x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
  133   LUT (x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
  134   LUT (xAA55AA5500AA00AA, a1, a4, a6, 0x49)
  135   LUT (xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
  136   LUT (x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
  137   LUT (xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
  138   LUT (xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
  139   LUT (x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
  140   LUT (x500F500F500F500F, a1, a3, a4, 0x98)
  141   LUT (xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
  142   LUT (xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
  143   LUT (xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
  144   LUT (xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
  145   LUT (x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
  146   LUT (x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
  147   LUT (x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
  148   LUT (x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
  149   LUT (xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
  150   LUT (x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
  151   LUT (x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)
  152 
  153   *out1 ^= x1;
  154   *out2 ^= x2;
  155   *out3 ^= x3;
  156   *out4 ^= x4;
  157 }
  158 
  159 DECLSPEC void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  160 {
  161   LUT (x55F055F055F055F0, a1, a3, a4, 0x72)
  162   LUT (xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
  163   LUT (xF50AF50AF50AF50A, a1, a3, a4, 0x59)
  164   LUT (xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
  165   LUT (x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
  166   LUT (x9999666699996666, a1, a2, a5, 0x69)
  167   LUT (x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
  168   LUT (xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
  169   LUT (x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
  170   LUT (x4848484848484848, a1, a2, a3, 0x12)
  171   LUT (x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
  172   LUT (x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
  173   LUT (x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
  174   LUT (x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
  175   LUT (x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
  176   LUT (x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
  177   LUT (x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
  178   LUT (x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)
  179 
  180   *out1 ^= x1;
  181   *out2 ^= x2;
  182   *out3 ^= x3;
  183   *out4 ^= x4;
  184 }
  185 
  186 DECLSPEC void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  187 {
  188   LUT (xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
  189   LUT (xFFFF00005555FFFF, a1, a5, a6, 0xB9)
  190   LUT (xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
  191   LUT (x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
  192   LUT (xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
  193   LUT (x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
  194   LUT (x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
  195   LUT (xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
  196   LUT (x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
  197   LUT (x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
  198   LUT (xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
  199   LUT (x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
  200   LUT (x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
  201   LUT (x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
  202   LUT (x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
  203   LUT (x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
  204   LUT (xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
  205   LUT (x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
  206   LUT (x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
  207   LUT (x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
  208   LUT (x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
  209   LUT (x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
  210   LUT (xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
  211   LUT (x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
  212   LUT (x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)
  213 
  214   *out1 ^= x1;
  215   *out2 ^= x2;
  216   *out3 ^= x3;
  217   *out4 ^= x4;
  218 }
  219 
  220 DECLSPEC void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  221 {
  222   LUT (x5050F5F55050F5F5, a1, a3, a5, 0xB2)
  223   LUT (x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
  224   LUT (xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
  225   LUT (x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
  226   LUT (x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
  227   LUT (xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
  228   LUT (x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
  229   LUT (x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
  230   LUT (x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
  231   LUT (xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
  232   LUT (x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
  233   LUT (x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
  234   LUT (xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
  235   LUT (x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
  236   LUT (x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
  237   LUT (x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
  238   LUT (xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
  239   LUT (x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
  240   LUT (x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
  241   LUT (x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
  242   LUT (x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
  243   LUT (xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
  244   LUT (x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
  245   LUT (x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)
  246 
  247   *out1 ^= x1;
  248   *out2 ^= x2;
  249   *out3 ^= x3;
  250   *out4 ^= x4;
  251 }
  252 
  253 DECLSPEC void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  254 {
  255   LUT (x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
  256   LUT (xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
  257   LUT (xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
  258   LUT (x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
  259   LUT (x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
  260   LUT (x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
  261   LUT (x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
  262   LUT (x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
  263   LUT (xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
  264   LUT (x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
  265   LUT (x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
  266   LUT (x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
  267   LUT (x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
  268   LUT (xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
  269   LUT (x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
  270   LUT (x8888CCCC8888CCCC, a1, a2, a5, 0x23)
  271   LUT (x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
  272   LUT (x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
  273   LUT (xA050A050A050A050, a1, a3, a4, 0x21)
  274   LUT (xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
  275   LUT (xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
  276   LUT (xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
  277   LUT (x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
  278   LUT (x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)
  279 
  280   *out1 ^= x1;
  281   *out2 ^= x2;
  282   *out3 ^= x3;
  283   *out4 ^= x4;
  284 }
  285 
  286 DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  287 {
  288   LUT (xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
  289   LUT (xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
  290   LUT (xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
  291   LUT (x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
  292   LUT (xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
  293   LUT (x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
  294   LUT (x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
  295   LUT (x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
  296   LUT (x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
  297   LUT (xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
  298   LUT (x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
  299   LUT (x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
  300   LUT (xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
  301   LUT (xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
  302   LUT (x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
  303   LUT (xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
  304   LUT (x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
  305   LUT (x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
  306   LUT (x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
  307   LUT (x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
  308   LUT (x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
  309   LUT (x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
  310   LUT (x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)
  311 
  312   *out1 ^= x1;
  313   *out2 ^= x2;
  314   *out3 ^= x3;
  315   *out4 ^= x4;
  316 }
  317 
  318 #else
  319 
  320 /*
  321  * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
  322  * architectures.  These use AND, OR, XOR, NOT, and AND-NOT gates.
  323  *
  324  * Gate counts: 49 44 46 33 48 46 46 41
  325  * Average: 44.125
  326  *
  327  * Several same-gate-count expressions for each S-box are included (for use on
  328  * different CPUs/GPUs).
  329  *
  330  * These Boolean expressions corresponding to DES S-boxes have been generated
  331  * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
  332  * John the Ripper password cracker: http://www.openwall.com/john/
  333  * Being mathematical formulas, they are not copyrighted and are free for reuse
  334  * by anyone.
  335  *
  336  * This file (a specific representation of the S-box expressions, surrounding
  337  * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
  338  * Redistribution and use in source and binary forms, with or without
  339  * modification, are permitted.  (This is a heavily cut-down "BSD license".)
  340  *
  341  * The effort has been sponsored by Rapid7: http://www.rapid7.com
  342  */
  343 
  344 DECLSPEC void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  345 {
  346     u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
  347         x25202160;
  348     u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
  349     u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
  350     u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
  351     u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
  352     u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
  353     u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
  354     u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
  355     u32 x00, x01, x10, x11, x20, x21, x30, x31;
  356 
  357     x55005500 = a1 & ~a5;
  358     x5A0F5A0F = a4 ^ x55005500;
  359     x3333FFFF = a3 | a6;
  360     x66666666 = a1 ^ a3;
  361     x22226666 = x3333FFFF & x66666666;
  362     x2D2D6969 = a4 ^ x22226666;
  363     x25202160 = x2D2D6969 & ~x5A0F5A0F;
  364 
  365     x00FFFF00 = a5 ^ a6;
  366     x33CCCC33 = a3 ^ x00FFFF00;
  367     x4803120C = x5A0F5A0F & ~x33CCCC33;
  368     x2222FFFF = a6 | x22226666;
  369     x6A21EDF3 = x4803120C ^ x2222FFFF;
  370     x4A01CC93 = x6A21EDF3 & ~x25202160;
  371 
  372     x5555FFFF = a1 | a6;
  373     x7F75FFFF = x6A21EDF3 | x5555FFFF;
  374     x00D20096 = a5 & ~x2D2D6969;
  375     x7FA7FF69 = x7F75FFFF ^ x00D20096;
  376 
  377     x0A0A0000 = a4 & ~x5555FFFF;
  378     x0AD80096 = x00D20096 ^ x0A0A0000;
  379     x00999900 = x00FFFF00 & ~x66666666;
  380     x0AD99996 = x0AD80096 | x00999900;
  381 
  382     x22332233 = a3 & ~x55005500;
  383     x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
  384     x054885C0 = x257AA5F0 & ~x22332233;
  385     xFAB77A3F = ~x054885C0;
  386     x2221EDF3 = x3333FFFF & x6A21EDF3;
  387     xD89697CC = xFAB77A3F ^ x2221EDF3;
  388     x20 = x7FA7FF69 & ~a2;
  389     x21 = x20 ^ xD89697CC;
  390     *out3 ^= x21;
  391 
  392     x05B77AC0 = x00FFFF00 ^ x054885C0;
  393     x05F77AD6 = x00D20096 | x05B77AC0;
  394     x36C48529 = x3333FFFF ^ x05F77AD6;
  395     x6391D07C = a1 ^ x36C48529;
  396     xBB0747B0 = xD89697CC ^ x6391D07C;
  397     x00 = x25202160 | a2;
  398     x01 = x00 ^ xBB0747B0;
  399     *out1 ^= x01;
  400 
  401     x4C460000 = x3333FFFF ^ x7F75FFFF;
  402     x4EDF9996 = x0AD99996 | x4C460000;
  403     x2D4E49EA = x6391D07C ^ x4EDF9996;
  404     xBBFFFFB0 = x00FFFF00 | xBB0747B0;
  405     x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
  406     x10 = x4A01CC93 | a2;
  407     x11 = x10 ^ x96B1B65A;
  408     *out2 ^= x11;
  409 
  410     x5AFF5AFF = a5 | x5A0F5A0F;
  411     x52B11215 = x5AFF5AFF & ~x2D4E49EA;
  412     x4201C010 = x4A01CC93 & x6391D07C;
  413     x10B0D205 = x52B11215 ^ x4201C010;
  414     x30 = x10B0D205 | a2;
  415     x31 = x30 ^ x0AD99996;
  416     *out4 ^= x31;
  417 }
  418 
  419 DECLSPEC void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  420 {
  421     u32 x33CC33CC;
  422     u32 x55550000, x00AA00FF, x33BB33FF;
  423     u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
  424     u32 x00000F0F, x336600FF, x332200FF, x332200F0;
  425     u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
  426     u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
  427     u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
  428     u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
  429     u32 x0A451047, xBBDFDD7B, xB19ACD3C;
  430     u32 x00, x01, x10, x11, x20, x21, x30, x31;
  431 
  432     x33CC33CC = a2 ^ a5;
  433 
  434     x55550000 = a1 & ~a6;
  435     x00AA00FF = a5 & ~x55550000;
  436     x33BB33FF = a2 | x00AA00FF;
  437 
  438     x33CC0000 = x33CC33CC & ~a6;
  439     x11441144 = a1 & x33CC33CC;
  440     x11BB11BB = a5 ^ x11441144;
  441     x003311BB = x11BB11BB & ~x33CC0000;
  442 
  443     x00000F0F = a3 & a6;
  444     x336600FF = x00AA00FF ^ x33CC0000;
  445     x332200FF = x33BB33FF & x336600FF;
  446     x332200F0 = x332200FF & ~x00000F0F;
  447 
  448     x0302000F = a3 & x332200FF;
  449     xAAAAAAAA = ~a1;
  450     xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
  451     x33CCCC33 = a6 ^ x33CC33CC;
  452     x33CCC030 = x33CCCC33 & ~x00000F0F;
  453     x9A646A95 = xA9A8AAA5 ^ x33CCC030;
  454     x10 = a4 & ~x332200F0;
  455     x11 = x10 ^ x9A646A95;
  456     *out2 ^= x11;
  457 
  458     x00333303 = a2 & ~x33CCC030;
  459     x118822B8 = x11BB11BB ^ x00333303;
  460     xA8208805 = xA9A8AAA5 & ~x118822B8;
  461     x3CC3C33C = a3 ^ x33CCCC33;
  462     x94E34B39 = xA8208805 ^ x3CC3C33C;
  463     x00 = x33BB33FF & ~a4;
  464     x01 = x00 ^ x94E34B39;
  465     *out1 ^= x01;
  466 
  467     x0331330C = x0302000F ^ x00333303;
  468     x3FF3F33C = x3CC3C33C | x0331330C;
  469     xA9DF596A = x33BB33FF ^ x9A646A95;
  470     xA9DF5F6F = x00000F0F | xA9DF596A;
  471     x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
  472 
  473     xA9466A6A = x332200FF ^ x9A646A95;
  474     x3DA52153 = x94E34B39 ^ xA9466A6A;
  475     x29850143 = xA9DF5F6F & x3DA52153;
  476     x33C0330C = x33CC33CC & x3FF3F33C;
  477     x1A45324F = x29850143 ^ x33C0330C;
  478     x20 = x1A45324F | a4;
  479     x21 = x20 ^ x962CAC53;
  480     *out3 ^= x21;
  481 
  482     x0A451047 = x1A45324F & ~x118822B8;
  483     xBBDFDD7B = x33CCCC33 | xA9DF596A;
  484     xB19ACD3C = x0A451047 ^ xBBDFDD7B;
  485     x30 = x003311BB | a4;
  486     x31 = x30 ^ xB19ACD3C;
  487     *out4 ^= x31;
  488 }
  489 
  490 DECLSPEC void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  491 {
  492     u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
  493     u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
  494     u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
  495     u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
  496     u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
  497     u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
  498     u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
  499     u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
  500     u32 x00, x01, x10, x11, x20, x21, x30, x31;
  501 
  502     x44444444 = a1 & ~a2;
  503     x0F0FF0F0 = a3 ^ a6;
  504     x4F4FF4F4 = x44444444 | x0F0FF0F0;
  505     x00FFFF00 = a4 ^ a6;
  506     x00AAAA00 = x00FFFF00 & ~a1;
  507     x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
  508 
  509     x3C3CC3C3 = a2 ^ x0F0FF0F0;
  510     x3C3C0000 = x3C3CC3C3 & ~a6;
  511     x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
  512     x0C840A00 = x4FE55EF4 & ~x7373F4F4;
  513 
  514     x00005EF4 = a6 & x4FE55EF4;
  515     x00FF5EFF = a4 | x00005EF4;
  516     x00555455 = a1 & x00FF5EFF;
  517     x3C699796 = x3C3CC3C3 ^ x00555455;
  518     x30 = x4FE55EF4 & ~a5;
  519     x31 = x30 ^ x3C699796;
  520     *out4 ^= x31;
  521 
  522     x000FF000 = x0F0FF0F0 & x00FFFF00;
  523     x55AA55AA = a1 ^ a4;
  524     x26D9A15E = x7373F4F4 ^ x55AA55AA;
  525     x2FDFAF5F = a3 | x26D9A15E;
  526     x2FD00F5F = x2FDFAF5F & ~x000FF000;
  527 
  528     x55AAFFAA = x00AAAA00 | x55AA55AA;
  529     x28410014 = x3C699796 & ~x55AAFFAA;
  530     x000000FF = a4 & a6;
  531     x000000CC = x000000FF & ~a2;
  532     x284100D8 = x28410014 ^ x000000CC;
  533 
  534     x204100D0 = x7373F4F4 & x284100D8;
  535     x3C3CC3FF = x3C3CC3C3 | x000000FF;
  536     x1C3CC32F = x3C3CC3FF & ~x204100D0;
  537     x4969967A = a1 ^ x1C3CC32F;
  538     x10 = x2FD00F5F & a5;
  539     x11 = x10 ^ x4969967A;
  540     *out2 ^= x11;
  541 
  542     x4CC44CC4 = x4FE55EF4 & ~a2;
  543     x40C040C0 = x4CC44CC4 & ~a3;
  544     xC3C33C3C = ~x3C3CC3C3;
  545     x9669C396 = x55AAFFAA ^ xC3C33C3C;
  546     xD6A98356 = x40C040C0 ^ x9669C396;
  547     x00 = a5 & ~x0C840A00;
  548     x01 = x00 ^ xD6A98356;
  549     *out1 ^= x01;
  550 
  551     xD6E9C3D6 = x40C040C0 | x9669C396;
  552     x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
  553     x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
  554     x001A000B = a4 & ~x4FE55EF4;
  555     x9A1F2D1B = x9A072D12 | x001A000B;
  556     x20 = a5 & ~x284100D8;
  557     x21 = x20 ^ x9A1F2D1B;
  558     *out3 ^= x21;
  559 }
  560 
  561 DECLSPEC void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  562 {
  563     u32 x5A5A5A5A, x0F0FF0F0;
  564     u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
  565         x52FBCA0F, x61C8F93C;
  566     u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
  567     u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
  568     u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
  569     u32 x00, x01, x10, x11, x20, x21, x30, x31;
  570 
  571     x5A5A5A5A = a1 ^ a3;
  572     x0F0FF0F0 = a3 ^ a5;
  573     x33FF33FF = a2 | a4;
  574     x33FFCC00 = a5 ^ x33FF33FF;
  575     x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
  576     x0C0CC0C0 = x0F0FF0F0 & ~a2;
  577     x0CF3C03F = a4 ^ x0C0CC0C0;
  578     x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
  579     x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
  580     x61C8F93C = a2 ^ x52FBCA0F;
  581 
  582     x00C0C03C = x0CF3C03F & x61C8F93C;
  583     x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
  584     x3B92A366 = x5A5A5A5A ^ x61C8F93C;
  585     x30908326 = x3B92A366 & ~x0F0F30C0;
  586     x3C90B3D6 = x0C0030F0 ^ x30908326;
  587 
  588     x33CC33CC = a2 ^ a4;
  589     x0C0CFFFF = a5 | x0C0CC0C0;
  590     x379E5C99 = x3B92A366 ^ x0C0CFFFF;
  591     x04124C11 = x379E5C99 & ~x33CC33CC;
  592     x56E9861E = x52FBCA0F ^ x04124C11;
  593     x00 = a6 & ~x3C90B3D6;
  594     x01 = x00 ^ x56E9861E;
  595     *out1 ^= x01;
  596 
  597     xA91679E1 = ~x56E9861E;
  598     x10 = x3C90B3D6 & ~a6;
  599     x11 = x10 ^ xA91679E1;
  600     *out2 ^= x11;
  601 
  602     x9586CA37 = x3C90B3D6 ^ xA91679E1;
  603     x8402C833 = x9586CA37 & ~x33CC33CC;
  604     x84C2C83F = x00C0C03C | x8402C833;
  605     xB35C94A6 = x379E5C99 ^ x84C2C83F;
  606     x20 = x61C8F93C | a6;
  607     x21 = x20 ^ xB35C94A6;
  608     *out3 ^= x21;
  609 
  610     x30 = a6 & x61C8F93C;
  611     x31 = x30 ^ xB35C94A6;
  612     *out4 ^= x31;
  613 }
  614 
  615 DECLSPEC void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  616 {
  617     u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
  618     u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
  619     u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
  620     u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
  621     u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
  622     u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
  623     u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
  624     u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
  625     u32 x00, x01, x10, x11, x20, x21, x30, x31;
  626 
  627     x77777777 = a1 | a3;
  628     x77770000 = x77777777 & ~a6;
  629     x22225555 = a1 ^ x77770000;
  630     x11116666 = a3 ^ x22225555;
  631     x1F1F6F6F = a4 | x11116666;
  632 
  633     x70700000 = x77770000 & ~a4;
  634     x43433333 = a3 ^ x70700000;
  635     x00430033 = a5 & x43433333;
  636     x55557777 = a1 | x11116666;
  637     x55167744 = x00430033 ^ x55557777;
  638     x5A19784B = a4 ^ x55167744;
  639 
  640     x5A1987B4 = a6 ^ x5A19784B;
  641     x7A3BD7F5 = x22225555 | x5A1987B4;
  642     x003B00F5 = a5 & x7A3BD7F5;
  643     x221955A0 = x22225555 ^ x003B00F5;
  644     x05050707 = a4 & x55557777;
  645     x271C52A7 = x221955A0 ^ x05050707;
  646 
  647     x2A2A82A0 = x7A3BD7F5 & ~a1;
  648     x6969B193 = x43433333 ^ x2A2A82A0;
  649     x1FE06F90 = a5 ^ x1F1F6F6F;
  650     x16804E00 = x1FE06F90 & ~x6969B193;
  651     xE97FB1FF = ~x16804E00;
  652     x20 = xE97FB1FF & ~a2;
  653     x21 = x20 ^ x5A19784B;
  654     *out3 ^= x21;
  655 
  656     x43403302 = x43433333 & ~x003B00F5;
  657     x35CAED30 = x2A2A82A0 ^ x1FE06F90;
  658     x37DEFFB7 = x271C52A7 | x35CAED30;
  659     x349ECCB5 = x37DEFFB7 & ~x43403302;
  660     x0B01234A = x1F1F6F6F & ~x349ECCB5;
  661 
  662     x101884B4 = x5A1987B4 & x349ECCB5;
  663     x0FF8EB24 = x1FE06F90 ^ x101884B4;
  664     x41413333 = x43433333 & x55557777;
  665     x4FF9FB37 = x0FF8EB24 | x41413333;
  666     x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
  667     x30 = x4FC2FBC2 & a2;
  668     x31 = x30 ^ x271C52A7;
  669     *out4 ^= x31;
  670 
  671     x22222222 = a1 ^ x77777777;
  672     x16BCEE97 = x349ECCB5 ^ x22222222;
  673     x0F080B04 = a4 & x0FF8EB24;
  674     x19B4E593 = x16BCEE97 ^ x0F080B04;
  675     x00 = x0B01234A | a2;
  676     x01 = x00 ^ x19B4E593;
  677     *out1 ^= x01;
  678 
  679     x5C5C5C5C = x1F1F6F6F ^ x43433333;
  680     x4448184C = x5C5C5C5C & ~x19B4E593;
  681     x2DDABE71 = x22225555 ^ x0FF8EB24;
  682     x6992A63D = x4448184C ^ x2DDABE71;
  683     x10 = x1F1F6F6F & a2;
  684     x11 = x10 ^ x6992A63D;
  685     *out2 ^= x11;
  686 }
  687 
  688 DECLSPEC void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  689 {
  690     u32 x33CC33CC;
  691     u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
  692     u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
  693     u32 x09030C06, x09030000, x336622FF, x3A6522FF;
  694     u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
  695     u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
  696     u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
  697     u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
  698     u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
  699     u32 x00, x01, x10, x11, x20, x21, x30, x31;
  700 
  701     x33CC33CC = a2 ^ a5;
  702 
  703     x3333FFFF = a2 | a6;
  704     x11115555 = a1 & x3333FFFF;
  705     x22DD6699 = x33CC33CC ^ x11115555;
  706     x22DD9966 = a6 ^ x22DD6699;
  707     x00220099 = a5 & ~x22DD9966;
  708 
  709     x00551144 = a1 & x22DD9966;
  710     x33662277 = a2 ^ x00551144;
  711     x5A5A5A5A = a1 ^ a3;
  712     x7B7E7A7F = x33662277 | x5A5A5A5A;
  713     x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
  714 
  715     x09030C06 = a3 & x59A31CE6;
  716     x09030000 = x09030C06 & ~a6;
  717     x336622FF = x00220099 | x33662277;
  718     x3A6522FF = x09030000 ^ x336622FF;
  719     x30 = x3A6522FF & a4;
  720     x31 = x30 ^ x59A31CE6;
  721     *out4 ^= x31;
  722 
  723     x484D494C = a2 ^ x7B7E7A7F;
  724     x0000B6B3 = a6 & ~x484D494C;
  725     x0F0FB9BC = a3 ^ x0000B6B3;
  726     x00FC00F9 = a5 & ~x09030C06;
  727     x0FFFB9FD = x0F0FB9BC | x00FC00F9;
  728 
  729     x5DF75DF7 = a1 | x59A31CE6;
  730     x116600F7 = x336622FF & x5DF75DF7;
  731     x1E69B94B = x0F0FB9BC ^ x116600F7;
  732     x1668B94B = x1E69B94B & ~x09030000;
  733     x20 = x00220099 | a4;
  734     x21 = x20 ^ x1668B94B;
  735     *out3 ^= x21;
  736 
  737     x7B7B7B7B = a2 | x5A5A5A5A;
  738     x411E5984 = x3A6522FF ^ x7B7B7B7B;
  739     x1FFFFDFD = x11115555 | x0FFFB9FD;
  740     x5EE1A479 = x411E5984 ^ x1FFFFDFD;
  741 
  742     x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
  743     x004B002D = a5 & ~x3CB4DFD2;
  744     xB7B2B6B3 = ~x484D494C;
  745     xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
  746     xCC82CDE5 = x004B002D ^ xCCC9CDC8;
  747     x10 = xCC82CDE5 & ~a4;
  748     x11 = x10 ^ x5EE1A479;
  749     *out2 ^= x11;
  750 
  751     x0055EEBB = a6 ^ x00551144;
  752     x5A5AECE9 = a1 ^ x0F0FB9BC;
  753     x0050ECA9 = x0055EEBB & x5A5AECE9;
  754     xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
  755     xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
  756     x00 = x0FFFB9FD & ~a4;
  757     x01 = x00 ^ xC59A2D67;
  758     *out1 ^= x01;
  759 }
  760 
  761 DECLSPEC void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  762 {
  763     u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
  764     u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
  765     u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
  766     u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
  767     u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
  768     u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
  769     u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
  770     u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
  771     u32 x00, x01, x10, x11, x20, x21, x30, x31;
  772 
  773     x0FF00FF0 = a4 ^ a5;
  774     x3CC33CC3 = a3 ^ x0FF00FF0;
  775     x00003CC3 = a6 & x3CC33CC3;
  776     x0F000F00 = a4 & x0FF00FF0;
  777     x5A555A55 = a2 ^ x0F000F00;
  778     x00001841 = x00003CC3 & x5A555A55;
  779 
  780     x00000F00 = a6 & x0F000F00;
  781     x33333C33 = a3 ^ x00000F00;
  782     x7B777E77 = x5A555A55 | x33333C33;
  783     x0FF0F00F = a6 ^ x0FF00FF0;
  784     x74878E78 = x7B777E77 ^ x0FF0F00F;
  785     x30 = a1 & ~x00001841;
  786     x31 = x30 ^ x74878E78;
  787     *out4 ^= x31;
  788 
  789     x003C003C = a5 & ~x3CC33CC3;
  790     x5A7D5A7D = x5A555A55 | x003C003C;
  791     x333300F0 = x00003CC3 ^ x33333C33;
  792     x694E5A8D = x5A7D5A7D ^ x333300F0;
  793 
  794     x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
  795     x000F0303 = a4 & ~x0FF0CCCC;
  796     x5A505854 = x5A555A55 & ~x000F0303;
  797     x33CC000F = a5 ^ x333300F0;
  798     x699C585B = x5A505854 ^ x33CC000F;
  799 
  800     x7F878F78 = x0F000F00 | x74878E78;
  801     x21101013 = a3 & x699C585B;
  802     x7F979F7B = x7F878F78 | x21101013;
  803     x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
  804     x4F9493BB = x7F979F7B ^ x30030CC0;
  805     x00 = x4F9493BB & ~a1;
  806     x01 = x00 ^ x694E5A8D;
  807     *out1 ^= x01;
  808 
  809     x6F9CDBFB = x699C585B | x4F9493BB;
  810     x0000DBFB = a6 & x6F9CDBFB;
  811     x00005151 = a2 & x0000DBFB;
  812     x26DAC936 = x694E5A8D ^ x4F9493BB;
  813     x26DA9867 = x00005151 ^ x26DAC936;
  814 
  815     x27DA9877 = x21101013 | x26DA9867;
  816     x27DA438C = x0000DBFB ^ x27DA9877;
  817     x2625C9C9 = a5 ^ x26DAC936;
  818     x27FFCBCD = x27DA438C | x2625C9C9;
  819     x20 = x27FFCBCD & a1;
  820     x21 = x20 ^ x699C585B;
  821     *out3 ^= x21;
  822 
  823     x27FF1036 = x0000DBFB ^ x27FFCBCD;
  824     x27FF103E = x003C003C | x27FF1036;
  825     xB06B6C44 = ~x4F9493BB;
  826     x97947C7A = x27FF103E ^ xB06B6C44;
  827     x10 = x97947C7A & ~a1;
  828     x11 = x10 ^ x26DA9867;
  829     *out2 ^= x11;
  830 }
  831 
  832 DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  833 {
  834     u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
  835     u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
  836     u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
  837     u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
  838     u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
  839     u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
  840     u32 xF700A600, x61008000, x03B7856B, x62B7056B;
  841     u32 x00, x01, x10, x11, x20, x21, x30, x31;
  842 
  843     x0C0C0C0C = a3 & ~a2;
  844     x0000F0F0 = a5 & ~a3;
  845     x00FFF00F = a4 ^ x0000F0F0;
  846     x00555005 = a1 & x00FFF00F;
  847     x00515001 = x00555005 & ~x0C0C0C0C;
  848 
  849     x33000330 = a2 & ~x00FFF00F;
  850     x77555775 = a1 | x33000330;
  851     x30303030 = a2 & ~a3;
  852     x3030CFCF = a5 ^ x30303030;
  853     x30104745 = x77555775 & x3030CFCF;
  854     x30555745 = x00555005 | x30104745;
  855 
  856     xFF000FF0 = ~x00FFF00F;
  857     xCF1048B5 = x30104745 ^ xFF000FF0;
  858     x080A080A = a3 & ~x77555775;
  859     xC71A40BF = xCF1048B5 ^ x080A080A;
  860     xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
  861     x10 = x00515001 | a6;
  862     x11 = x10 ^ xCB164CB3;
  863     *out2 ^= x11;
  864 
  865     x9E4319E6 = a1 ^ xCB164CB3;
  866     x000019E6 = a5 & x9E4319E6;
  867     xF429738C = a2 ^ xC71A40BF;
  868     xF4296A6A = x000019E6 ^ xF429738C;
  869     xC729695A = x33000330 ^ xF4296A6A;
  870 
  871     xC47C3D2F = x30555745 ^ xF4296A6A;
  872     xF77F3F3F = a2 | xC47C3D2F;
  873     x9E43E619 = a5 ^ x9E4319E6;
  874     x693CD926 = xF77F3F3F ^ x9E43E619;
  875     x20 = x30555745 & a6;
  876     x21 = x20 ^ x693CD926;
  877     *out3 ^= x21;
  878 
  879     xF719A695 = x3030CFCF ^ xC729695A;
  880     xF4FF73FF = a4 | xF429738C;
  881     x03E6D56A = xF719A695 ^ xF4FF73FF;
  882     x56B3803F = a1 ^ x03E6D56A;
  883     x30 = x56B3803F & a6;
  884     x31 = x30 ^ xC729695A;
  885     *out4 ^= x31;
  886 
  887     xF700A600 = xF719A695 & ~a4;
  888     x61008000 = x693CD926 & xF700A600;
  889     x03B7856B = x00515001 ^ x03E6D56A;
  890     x62B7056B = x61008000 ^ x03B7856B;
  891     x00 = x62B7056B | a6;
  892     x01 = x00 ^ xC729695A;
  893     *out1 ^= x01;
  894 }
  895 
  896 #endif
  897 #endif
  898 
  899 #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
  900 
  901 /*
  902  * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
  903  * architectures.  These use AND, OR, XOR, NOT, and AND-NOT gates.
  904  *
  905  * Gate counts: 49 44 46 33 48 46 46 41
  906  * Average: 44.125
  907  *
  908  * Several same-gate-count expressions for each S-box are included (for use on
  909  * different CPUs/GPUs).
  910  *
  911  * These Boolean expressions corresponding to DES S-boxes have been generated
  912  * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
  913  * John the Ripper password cracker: http://www.openwall.com/john/
  914  * Being mathematical formulas, they are not copyrighted and are free for reuse
  915  * by anyone.
  916  *
  917  * This file (a specific representation of the S-box expressions, surrounding
  918  * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
  919  * Redistribution and use in source and binary forms, with or without
  920  * modification, are permitted.  (This is a heavily cut-down "BSD license".)
  921  *
  922  * The effort has been sponsored by Rapid7: http://www.rapid7.com
  923  */
  924 
  925 DECLSPEC void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
  926 {
  927     u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
  928         x25202160;
  929     u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
  930     u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
  931     u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
  932     u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
  933     u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
  934     u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
  935     u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
  936     u32 x00, x01, x10, x11, x20, x21, x30, x31;
  937 
  938     x55005500 = a1 & ~a5;
  939     x5A0F5A0F = a4 ^ x55005500;
  940     x3333FFFF = a3 | a6;
  941     x66666666 = a1 ^ a3;
  942     x22226666 = x3333FFFF & x66666666;
  943     x2D2D6969 = a4 ^ x22226666;
  944     x25202160 = x2D2D6969 & ~x5A0F5A0F;
  945 
  946     x00FFFF00 = a5 ^ a6;
  947     x33CCCC33 = a3 ^ x00FFFF00;
  948     x4803120C = x5A0F5A0F & ~x33CCCC33;
  949     x2222FFFF = a6 | x22226666;
  950     x6A21EDF3 = x4803120C ^ x2222FFFF;
  951     x4A01CC93 = x6A21EDF3 & ~x25202160;
  952 
  953     x5555FFFF = a1 | a6;
  954     x7F75FFFF = x6A21EDF3 | x5555FFFF;
  955     x00D20096 = a5 & ~x2D2D6969;
  956     x7FA7FF69 = x7F75FFFF ^ x00D20096;
  957 
  958     x0A0A0000 = a4 & ~x5555FFFF;
  959     x0AD80096 = x00D20096 ^ x0A0A0000;
  960     x00999900 = x00FFFF00 & ~x66666666;
  961     x0AD99996 = x0AD80096 | x00999900;
  962 
  963     x22332233 = a3 & ~x55005500;
  964     x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
  965     x054885C0 = x257AA5F0 & ~x22332233;
  966     xFAB77A3F = ~x054885C0;
  967     x2221EDF3 = x3333FFFF & x6A21EDF3;
  968     xD89697CC = xFAB77A3F ^ x2221EDF3;
  969     x20 = x7FA7FF69 & ~a2;
  970     x21 = x20 ^ xD89697CC;
  971     *out3 ^= x21;
  972 
  973     x05B77AC0 = x00FFFF00 ^ x054885C0;
  974     x05F77AD6 = x00D20096 | x05B77AC0;
  975     x36C48529 = x3333FFFF ^ x05F77AD6;
  976     x6391D07C = a1 ^ x36C48529;
  977     xBB0747B0 = xD89697CC ^ x6391D07C;
  978     x00 = x25202160 | a2;
  979     x01 = x00 ^ xBB0747B0;
  980     *out1 ^= x01;
  981 
  982     x4C460000 = x3333FFFF ^ x7F75FFFF;
  983     x4EDF9996 = x0AD99996 | x4C460000;
  984     x2D4E49EA = x6391D07C ^ x4EDF9996;
  985     xBBFFFFB0 = x00FFFF00 | xBB0747B0;
  986     x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
  987     x10 = x4A01CC93 | a2;
  988     x11 = x10 ^ x96B1B65A;
  989     *out2 ^= x11;
  990 
  991     x5AFF5AFF = a5 | x5A0F5A0F;
  992     x52B11215 = x5AFF5AFF & ~x2D4E49EA;
  993     x4201C010 = x4A01CC93 & x6391D07C;
  994     x10B0D205 = x52B11215 ^ x4201C010;
  995     x30 = x10B0D205 | a2;
  996     x31 = x30 ^ x0AD99996;
  997     *out4 ^= x31;
  998 }
  999 
 1000 DECLSPEC void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
 1001 {
 1002     u32 x33CC33CC;
 1003     u32 x55550000, x00AA00FF, x33BB33FF;
 1004     u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
 1005     u32 x00000F0F, x336600FF, x332200FF, x332200F0;
 1006     u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
 1007     u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
 1008     u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
 1009     u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
 1010     u32 x0A451047, xBBDFDD7B, xB19ACD3C;
 1011     u32 x00, x01, x10, x11, x20, x21, x30, x31;
 1012 
 1013     x33CC33CC = a2 ^ a5;
 1014 
 1015     x55550000 = a1 & ~a6;
 1016     x00AA00FF = a5 & ~x55550000;
 1017     x33BB33FF = a2 | x00AA00FF;
 1018 
 1019     x33CC0000 = x33CC33CC & ~a6;
 1020     x11441144 = a1 & x33CC33CC;
 1021     x11BB11BB = a5 ^ x11441144;
 1022     x003311BB = x11BB11BB & ~x33CC0000;
 1023 
 1024     x00000F0F = a3 & a6;
 1025     x336600FF = x00AA00FF ^ x33CC0000;
 1026     x332200FF = x33BB33FF & x336600FF;
 1027     x332200F0 = x332200FF & ~x00000F0F;
 1028 
 1029     x0302000F = a3 & x332200FF;
 1030     xAAAAAAAA = ~a1;
 1031     xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
 1032     x33CCCC33 = a6 ^ x33CC33CC;
 1033     x33CCC030 = x33CCCC33 & ~x00000F0F;
 1034     x9A646A95 = xA9A8AAA5 ^ x33CCC030;
 1035     x10 = a4 & ~x332200F0;
 1036     x11 = x10 ^ x9A646A95;
 1037     *out2 ^= x11;
 1038 
 1039     x00333303 = a2 & ~x33CCC030;
 1040     x118822B8 = x11BB11BB ^ x00333303;
 1041     xA8208805 = xA9A8AAA5 & ~x118822B8;
 1042     x3CC3C33C = a3 ^ x33CCCC33;
 1043     x94E34B39 = xA8208805 ^ x3CC3C33C;
 1044     x00 = x33BB33FF & ~a4;
 1045     x01 = x00 ^ x94E34B39;
 1046     *out1 ^= x01;
 1047 
 1048     x0331330C = x0302000F ^ x00333303;
 1049     x3FF3F33C = x3CC3C33C | x0331330C;
 1050     xA9DF596A = x33BB33FF ^ x9A646A95;
 1051     xA9DF5F6F = x00000F0F | xA9DF596A;
 1052     x962CAC53 = x3FF3F33C ^ xA9DF5F6F;
 1053 
 1054     xA9466A6A = x332200FF ^ x9A646A95;
 1055     x3DA52153 = x94E34B39 ^ xA9466A6A;
 1056     x29850143 = xA9DF5F6F & x3DA52153;
 1057     x33C0330C = x33CC33CC & x3FF3F33C;
 1058     x1A45324F = x29850143 ^ x33C0330C;
 1059     x20 = x1A45324F | a4;
 1060     x21 = x20 ^ x962CAC53;
 1061     *out3 ^= x21;
 1062 
 1063     x0A451047 = x1A45324F & ~x118822B8;
 1064     xBBDFDD7B = x33CCCC33 | xA9DF596A;
 1065     xB19ACD3C = x0A451047 ^ xBBDFDD7B;
 1066     x30 = x003311BB | a4;
 1067     x31 = x30 ^ xB19ACD3C;
 1068     *out4 ^= x31;
 1069 }
 1070 
 1071 DECLSPEC void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
 1072 {
 1073     u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
 1074     u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
 1075     u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
 1076     u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
 1077     u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
 1078     u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
 1079     u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
 1080     u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
 1081     u32 x00, x01, x10, x11, x20, x21, x30, x31;
 1082 
 1083     x44444444 = a1 & ~a2;
 1084     x0F0FF0F0 = a3 ^ a6;
 1085     x4F4FF4F4 = x44444444 | x0F0FF0F0;
 1086     x00FFFF00 = a4 ^ a6;
 1087     x00AAAA00 = x00FFFF00 & ~a1;
 1088     x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;
 1089 
 1090     x3C3CC3C3 = a2 ^ x0F0FF0F0;
 1091     x3C3C0000 = x3C3CC3C3 & ~a6;
 1092     x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
 1093     x0C840A00 = x4FE55EF4 & ~x7373F4F4;
 1094 
 1095     x00005EF4 = a6 & x4FE55EF4;
 1096     x00FF5EFF = a4 | x00005EF4;
 1097     x00555455 = a1 & x00FF5EFF;
 1098     x3C699796 = x3C3CC3C3 ^ x00555455;
 1099     x30 = x4FE55EF4 & ~a5;
 1100     x31 = x30 ^ x3C699796;
 1101     *out4 ^= x31;
 1102 
 1103     x000FF000 = x0F0FF0F0 & x00FFFF00;
 1104     x55AA55AA = a1 ^ a4;
 1105     x26D9A15E = x7373F4F4 ^ x55AA55AA;
 1106     x2FDFAF5F = a3 | x26D9A15E;
 1107     x2FD00F5F = x2FDFAF5F & ~x000FF000;
 1108 
 1109     x55AAFFAA = x00AAAA00 | x55AA55AA;
 1110     x28410014 = x3C699796 & ~x55AAFFAA;
 1111     x000000FF = a4 & a6;
 1112     x000000CC = x000000FF & ~a2;
 1113     x284100D8 = x28410014 ^ x000000CC;
 1114 
 1115     x204100D0 = x7373F4F4 & x284100D8;
 1116     x3C3CC3FF = x3C3CC3C3 | x000000FF;
 1117     x1C3CC32F = x3C3CC3FF & ~x204100D0;
 1118     x4969967A = a1 ^ x1C3CC32F;
 1119     x10 = x2FD00F5F & a5;
 1120     x11 = x10 ^ x4969967A;
 1121     *out2 ^= x11;
 1122 
 1123     x4CC44CC4 = x4FE55EF4 & ~a2;
 1124     x40C040C0 = x4CC44CC4 & ~a3;
 1125     xC3C33C3C = ~x3C3CC3C3;
 1126     x9669C396 = x55AAFFAA ^ xC3C33C3C;
 1127     xD6A98356 = x40C040C0 ^ x9669C396;
 1128     x00 = a5 & ~x0C840A00;
 1129     x01 = x00 ^ xD6A98356;
 1130     *out1 ^= x01;
 1131 
 1132     xD6E9C3D6 = x40C040C0 | x9669C396;
 1133     x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
 1134     x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
 1135     x001A000B = a4 & ~x4FE55EF4;
 1136     x9A1F2D1B = x9A072D12 | x001A000B;
 1137     x20 = a5 & ~x284100D8;
 1138     x21 = x20 ^ x9A1F2D1B;
 1139     *out3 ^= x21;
 1140 }
 1141 
 1142 DECLSPEC void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
 1143 {
 1144     u32 x5A5A5A5A, x0F0FF0F0;
 1145     u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
 1146         x52FBCA0F, x61C8F93C;
 1147     u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
 1148     u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
 1149     u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
 1150     u32 x00, x01, x10, x11, x20, x21, x30, x31;
 1151 
 1152     x5A5A5A5A = a1 ^ a3;
 1153     x0F0FF0F0 = a3 ^ a5;
 1154     x33FF33FF = a2 | a4;
 1155     x33FFCC00 = a5 ^ x33FF33FF;
 1156     x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
 1157     x0C0CC0C0 = x0F0FF0F0 & ~a2;
 1158     x0CF3C03F = a4 ^ x0C0CC0C0;
 1159     x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
 1160     x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
 1161     x61C8F93C = a2 ^ x52FBCA0F;
 1162 
 1163     x00C0C03C = x0CF3C03F & x61C8F93C;
 1164     x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
 1165     x3B92A366 = x5A5A5A5A ^ x61C8F93C;
 1166     x30908326 = x3B92A366 & ~x0F0F30C0;
 1167     x3C90B3D6 = x0C0030F0 ^ x30908326;
 1168 
 1169     x33CC33CC = a2 ^ a4;
 1170     x0C0CFFFF = a5 | x0C0CC0C0;
 1171     x379E5C99 = x3B92A366 ^ x0C0CFFFF;
 1172     x04124C11 = x379E5C99 & ~x33CC33CC;
 1173     x56E9861E = x52FBCA0F ^ x04124C11;
 1174     x00 = a6 & ~x3C90B3D6;
 1175     x01 = x00 ^ x56E9861E;
 1176     *out1 ^= x01;
 1177 
 1178     xA91679E1 = ~x56E9861E;
 1179     x10 = x3C90B3D6 & ~a6;
 1180     x11 = x10 ^ xA91679E1;
 1181     *out2 ^= x11;
 1182 
 1183     x9586CA37 = x3C90B3D6 ^ xA91679E1;
 1184     x8402C833 = x9586CA37 & ~x33CC33CC;
 1185     x84C2C83F = x00C0C03C | x8402C833;
 1186     xB35C94A6 = x379E5C99 ^ x84C2C83F;
 1187     x20 = x61C8F93C | a6;
 1188     x21 = x20 ^ xB35C94A6;
 1189     *out3 ^= x21;
 1190 
 1191     x30 = a6 & x61C8F93C;
 1192     x31 = x30 ^ xB35C94A6;
 1193     *out4 ^= x31;
 1194 }
 1195 
 1196 DECLSPEC void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
 1197 {
 1198     u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
 1199     u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
 1200     u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
 1201     u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
 1202     u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
 1203     u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
 1204     u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
 1205     u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
 1206     u32 x00, x01, x10, x11, x20, x21, x30, x31;
 1207 
 1208     x77777777 = a1 | a3;
 1209     x77770000 = x77777777 & ~a6;
 1210     x22225555 = a1 ^ x77770000;
 1211     x11116666 = a3 ^ x22225555;
 1212     x1F1F6F6F = a4 | x11116666;
 1213 
 1214     x70700000 = x77770000 & ~a4;
 1215     x43433333 = a3 ^ x70700000;
 1216     x00430033 = a5 & x43433333;
 1217     x55557777 = a1 | x11116666;
 1218     x55167744 = x00430033 ^ x55557777;
 1219     x5A19784B = a4 ^ x55167744;
 1220 
 1221     x5A1987B4 = a6 ^ x5A19784B;
 1222     x7A3BD7F5 = x22225555 | x5A1987B4;
 1223     x003B00F5 = a5 & x7A3BD7F5;
 1224     x221955A0 = x22225555 ^ x003B00F5;
 1225     x05050707 = a4 & x55557777;
 1226     x271C52A7 = x221955A0 ^ x05050707;
 1227 
 1228     x2A2A82A0 = x7A3BD7F5 & ~a1;
 1229     x6969B193 = x43433333 ^ x2A2A82A0;
 1230     x1FE06F90 = a5 ^ x1F1F6F6F;
 1231     x16804E00 = x1FE06F90 & ~x6969B193;
 1232     xE97FB1FF = ~x16804E00;
 1233     x20 = xE97FB1FF & ~a2;
 1234     x21 = x20 ^ x5A19784B;
 1235     *out3 ^= x21;
 1236 
 1237     x43403302 = x43433333 & ~x003B00F5;
 1238     x35CAED30 = x2A2A82A0 ^ x1FE06F90;
 1239     x37DEFFB7 = x271C52A7 | x35CAED30;
 1240     x349ECCB5 = x37DEFFB7 & ~x43403302;
 1241     x0B01234A = x1F1F6F6F & ~x349ECCB5;
 1242 
 1243     x101884B4 = x5A1987B4 & x349ECCB5;
 1244     x0FF8EB24 = x1FE06F90 ^ x101884B4;
 1245     x41413333 = x43433333 & x55557777;
 1246     x4FF9FB37 = x0FF8EB24 | x41413333;
 1247     x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
 1248     x30 = x4FC2FBC2 & a2;
 1249     x31 = x30 ^ x271C52A7;
 1250     *out4 ^= x31;
 1251 
 1252     x22222222 = a1 ^ x77777777;
 1253     x16BCEE97 = x349ECCB5 ^ x22222222;
 1254     x0F080B04 = a4 & x0FF8EB24;
 1255     x19B4E593 = x16BCEE97 ^ x0F080B04;
 1256     x00 = x0B01234A | a2;
 1257     x01 = x00 ^ x19B4E593;
 1258     *out1 ^= x01;
 1259 
 1260     x5C5C5C5C = x1F1F6F6F ^ x43433333;
 1261     x4448184C = x5C5C5C5C & ~x19B4E593;
 1262     x2DDABE71 = x22225555 ^ x0FF8EB24;
 1263     x6992A63D = x4448184C ^ x2DDABE71;
 1264     x10 = x1F1F6F6F & a2;
 1265     x11 = x10 ^ x6992A63D;
 1266     *out2 ^= x11;
 1267 }
 1268 
 1269 DECLSPEC void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
 1270 {
 1271     u32 x33CC33CC;
 1272     u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
 1273     u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
 1274     u32 x09030C06, x09030000, x336622FF, x3A6522FF;
 1275     u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
 1276     u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
 1277     u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
 1278     u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
 1279     u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
 1280     u32 x00, x01, x10, x11, x20, x21, x30, x31;
 1281 
 1282     x33CC33CC = a2 ^ a5;
 1283 
 1284     x3333FFFF = a2 | a6;
 1285     x11115555 = a1 & x3333FFFF;
 1286     x22DD6699 = x33CC33CC ^ x11115555;
 1287     x22DD9966 = a6 ^ x22DD6699;
 1288     x00220099 = a5 & ~x22DD9966;
 1289 
 1290     x00551144 = a1 & x22DD9966;
 1291     x33662277 = a2 ^ x00551144;
 1292     x5A5A5A5A = a1 ^ a3;
 1293     x7B7E7A7F = x33662277 | x5A5A5A5A;
 1294     x59A31CE6 = x22DD6699 ^ x7B7E7A7F;
 1295 
 1296     x09030C06 = a3 & x59A31CE6;
 1297     x09030000 = x09030C06 & ~a6;
 1298     x336622FF = x00220099 | x33662277;
 1299     x3A6522FF = x09030000 ^ x336622FF;
 1300     x30 = x3A6522FF & a4;
 1301     x31 = x30 ^ x59A31CE6;
 1302     *out4 ^= x31;
 1303 
 1304     x484D494C = a2 ^ x7B7E7A7F;
 1305     x0000B6B3 = a6 & ~x484D494C;
 1306     x0F0FB9BC = a3 ^ x0000B6B3;
 1307     x00FC00F9 = a5 & ~x09030C06;
 1308     x0FFFB9FD = x0F0FB9BC | x00FC00F9;
 1309 
 1310     x5DF75DF7 = a1 | x59A31CE6;
 1311     x116600F7 = x336622FF & x5DF75DF7;
 1312     x1E69B94B = x0F0FB9BC ^ x116600F7;
 1313     x1668B94B = x1E69B94B & ~x09030000;
 1314     x20 = x00220099 | a4;
 1315     x21 = x20 ^ x1668B94B;
 1316     *out3 ^= x21;
 1317 
 1318     x7B7B7B7B = a2 | x5A5A5A5A;
 1319     x411E5984 = x3A6522FF ^ x7B7B7B7B;
 1320     x1FFFFDFD = x11115555 | x0FFFB9FD;
 1321     x5EE1A479 = x411E5984 ^ x1FFFFDFD;
 1322 
 1323     x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
 1324     x004B002D = a5 & ~x3CB4DFD2;
 1325     xB7B2B6B3 = ~x484D494C;
 1326     xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
 1327     xCC82CDE5 = x004B002D ^ xCCC9CDC8;
 1328     x10 = xCC82CDE5 & ~a4;
 1329     x11 = x10 ^ x5EE1A479;
 1330     *out2 ^= x11;
 1331 
 1332     x0055EEBB = a6 ^ x00551144;
 1333     x5A5AECE9 = a1 ^ x0F0FB9BC;
 1334     x0050ECA9 = x0055EEBB & x5A5AECE9;
 1335     xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
 1336     xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
 1337     x00 = x0FFFB9FD & ~a4;
 1338     x01 = x00 ^ xC59A2D67;
 1339     *out1 ^= x01;
 1340 }
 1341 
 1342 DECLSPEC void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
 1343 {
 1344     u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
 1345     u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
 1346     u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
 1347     u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
 1348     u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
 1349     u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
 1350     u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
 1351     u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
 1352     u32 x00, x01, x10, x11, x20, x21, x30, x31;
 1353 
 1354     x0FF00FF0 = a4 ^ a5;
 1355     x3CC33CC3 = a3 ^ x0FF00FF0;
 1356     x00003CC3 = a6 & x3CC33CC3;
 1357     x0F000F00 = a4 & x0FF00FF0;
 1358     x5A555A55 = a2 ^ x0F000F00;
 1359     x00001841 = x00003CC3 & x5A555A55;
 1360 
 1361     x00000F00 = a6 & x0F000F00;
 1362     x33333C33 = a3 ^ x00000F00;
 1363     x7B777E77 = x5A555A55 | x33333C33;
 1364     x0FF0F00F = a6 ^ x0FF00FF0;
 1365     x74878E78 = x7B777E77 ^ x0FF0F00F;
 1366     x30 = a1 & ~x00001841;
 1367     x31 = x30 ^ x74878E78;
 1368     *out4 ^= x31;
 1369 
 1370     x003C003C = a5 & ~x3CC33CC3;
 1371     x5A7D5A7D = x5A555A55 | x003C003C;
 1372     x333300F0 = x00003CC3 ^ x33333C33;
 1373     x694E5A8D = x5A7D5A7D ^ x333300F0;
 1374 
 1375     x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
 1376     x000F0303 = a4 & ~x0FF0CCCC;
 1377     x5A505854 = x5A555A55 & ~x000F0303;
 1378     x33CC000F = a5 ^ x333300F0;
 1379     x699C585B = x5A505854 ^ x33CC000F;
 1380 
 1381     x7F878F78 = x0F000F00 | x74878E78;
 1382     x21101013 = a3 & x699C585B;
 1383     x7F979F7B = x7F878F78 | x21101013;
 1384     x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
 1385     x4F9493BB = x7F979F7B ^ x30030CC0;
 1386     x00 = x4F9493BB & ~a1;
 1387     x01 = x00 ^ x694E5A8D;
 1388     *out1 ^= x01;
 1389 
 1390     x6F9CDBFB = x699C585B | x4F9493BB;
 1391     x0000DBFB = a6 & x6F9CDBFB;
 1392     x00005151 = a2 & x0000DBFB;
 1393     x26DAC936 = x694E5A8D ^ x4F9493BB;
 1394     x26DA9867 = x00005151 ^ x26DAC936;
 1395 
 1396     x27DA9877 = x21101013 | x26DA9867;
 1397     x27DA438C = x0000DBFB ^ x27DA9877;
 1398     x2625C9C9 = a5 ^ x26DAC936;
 1399     x27FFCBCD = x27DA438C | x2625C9C9;
 1400     x20 = x27FFCBCD & a1;
 1401     x21 = x20 ^ x699C585B;
 1402     *out3 ^= x21;
 1403 
 1404     x27FF1036 = x0000DBFB ^ x27FFCBCD;
 1405     x27FF103E = x003C003C | x27FF1036;
 1406     xB06B6C44 = ~x4F9493BB;
 1407     x97947C7A = x27FF103E ^ xB06B6C44;
 1408     x10 = x97947C7A & ~a1;
 1409     x11 = x10 ^ x26DA9867;
 1410     *out2 ^= x11;
 1411 }
 1412 
 1413 DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
 1414 {
 1415     u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
 1416     u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
 1417     u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
 1418     u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
 1419     u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
 1420     u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
 1421     u32 xF700A600, x61008000, x03B7856B, x62B7056B;
 1422     u32 x00, x01, x10, x11, x20, x21, x30, x31;
 1423 
 1424     x0C0C0C0C = a3 & ~a2;
 1425     x0000F0F0 = a5 & ~a3;
 1426     x00FFF00F = a4 ^ x0000F0F0;
 1427     x00555005 = a1 & x00FFF00F;
 1428     x00515001 = x00555005 & ~x0C0C0C0C;
 1429 
 1430     x33000330 = a2 & ~x00FFF00F;
 1431     x77555775 = a1 | x33000330;
 1432     x30303030 = a2 & ~a3;
 1433     x3030CFCF = a5 ^ x30303030;
 1434     x30104745 = x77555775 & x3030CFCF;
 1435     x30555745 = x00555005 | x30104745;
 1436 
 1437     xFF000FF0 = ~x00FFF00F;
 1438     xCF1048B5 = x30104745 ^ xFF000FF0;
 1439     x080A080A = a3 & ~x77555775;
 1440     xC71A40BF = xCF1048B5 ^ x080A080A;
 1441     xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
 1442     x10 = x00515001 | a6;
 1443     x11 = x10 ^ xCB164CB3;
 1444     *out2 ^= x11;
 1445 
 1446     x9E4319E6 = a1 ^ xCB164CB3;
 1447     x000019E6 = a5 & x9E4319E6;
 1448     xF429738C = a2 ^ xC71A40BF;
 1449     xF4296A6A = x000019E6 ^ xF429738C;
 1450     xC729695A = x33000330 ^ xF4296A6A;
 1451 
 1452     xC47C3D2F = x30555745 ^ xF4296A6A;
 1453     xF77F3F3F = a2 | xC47C3D2F;
 1454     x9E43E619 = a5 ^ x9E4319E6;
 1455     x693CD926 = xF77F3F3F ^ x9E43E619;
 1456     x20 = x30555745 & a6;
 1457     x21 = x20 ^ x693CD926;
 1458     *out3 ^= x21;
 1459 
 1460     xF719A695 = x3030CFCF ^ xC729695A;
 1461     xF4FF73FF = a4 | xF429738C;
 1462     x03E6D56A = xF719A695 ^ xF4FF73FF;
 1463     x56B3803F = a1 ^ x03E6D56A;
 1464     x30 = x56B3803F & a6;
 1465     x31 = x30 ^ xC729695A;
 1466     *out4 ^= x31;
 1467 
 1468     xF700A600 = xF719A695 & ~a4;
 1469     x61008000 = x693CD926 & xF700A600;
 1470     x03B7856B = x00515001 ^ x03E6D56A;
 1471     x62B7056B = x61008000 ^ x03B7856B;
 1472     x00 = x62B7056B | a6;
 1473     x01 = x00 ^ xC729695A;
 1474     *out1 ^= x01;
 1475 }
 1476 
 1477 #endif
 1478 
 1479 //#define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
 1480 #define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
 1481 
 1482 #define DATASWAP  \
 1483   SWAP (D00, D32); \
 1484   SWAP (D01, D33); \
 1485   SWAP (D02, D34); \
 1486   SWAP (D03, D35); \
 1487   SWAP (D04, D36); \
 1488   SWAP (D05, D37); \
 1489   SWAP (D06, D38); \
 1490   SWAP (D07, D39); \
 1491   SWAP (D08, D40); \
 1492   SWAP (D09, D41); \
 1493   SWAP (D10, D42); \
 1494   SWAP (D11, D43); \
 1495   SWAP (D12, D44); \
 1496   SWAP (D13, D45); \
 1497   SWAP (D14, D46); \
 1498   SWAP (D15, D47); \
 1499   SWAP (D16, D48); \
 1500   SWAP (D17, D49); \
 1501   SWAP (D18, D50); \
 1502   SWAP (D19, D51); \
 1503   SWAP (D20, D52); \
 1504   SWAP (D21, D53); \
 1505   SWAP (D22, D54); \
 1506   SWAP (D23, D55); \
 1507   SWAP (D24, D56); \
 1508   SWAP (D25, D57); \
 1509   SWAP (D26, D58); \
 1510   SWAP (D27, D59); \
 1511   SWAP (D28, D60); \
 1512   SWAP (D29, D61); \
 1513   SWAP (D30, D62); \
 1514   SWAP (D31, D63);
 1515 
 1516 #define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
 1517 #define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
 1518 #define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
 1519 #define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
 1520 #define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
 1521 #define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
 1522 #define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
 1523 #define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
 1524 #define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
 1525 #define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
 1526 #define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
 1527 #define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
 1528 #define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
 1529 #define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
 1530 #define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
 1531 #define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }
 1532 
 1533 #define myselx(a,b,c) ((c) ? (b) : (a))
 1534 
 1535 #ifdef DESCRYPT_SALT
 1536 
 1537 DECLSPEC void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, PRIVATE_AS u32 *D00, PRIVATE_AS u32 *D01, PRIVATE_AS u32 *D02, PRIVATE_AS u32 *D03, PRIVATE_AS u32 *D04, PRIVATE_AS u32 *D05, PRIVATE_AS u32 *D06, PRIVATE_AS u32 *D07, PRIVATE_AS u32 *D08, PRIVATE_AS u32 *D09, PRIVATE_AS u32 *D10, PRIVATE_AS u32 *D11, PRIVATE_AS u32 *D12, PRIVATE_AS u32 *D13, PRIVATE_AS u32 *D14, PRIVATE_AS u32 *D15, PRIVATE_AS u32 *D16, PRIVATE_AS u32 *D17, PRIVATE_AS u32 *D18, PRIVATE_AS u32 *D19, PRIVATE_AS u32 *D20, PRIVATE_AS u32 *D21, PRIVATE_AS u32 *D22, PRIVATE_AS u32 *D23, PRIVATE_AS u32 *D24, PRIVATE_AS u32 *D25, PRIVATE_AS u32 *D26, PRIVATE_AS u32 *D27, PRIVATE_AS u32 *D28, PRIVATE_AS u32 *D29, PRIVATE_AS u32 *D30, PRIVATE_AS u32 *D31, PRIVATE_AS u32 *D32, PRIVATE_AS u32 *D33, PRIVATE_AS u32 *D34, PRIVATE_AS u32 *D35, PRIVATE_AS u32 *D36, PRIVATE_AS u32 *D37, PRIVATE_AS u32 *D38, PRIVATE_AS u32 *D39, PRIVATE_AS u32 *D40, PRIVATE_AS u32 *D41, PRIVATE_AS u32 *D42, PRIVATE_AS u32 *D43, PRIVATE_AS u32 *D44, PRIVATE_AS u32 *D45, PRIVATE_AS u32 *D46, PRIVATE_AS u32 *D47, PRIVATE_AS u32 *D48, PRIVATE_AS u32 *D49, PRIVATE_AS u32 *D50, PRIVATE_AS u32 *D51, PRIVATE_AS u32 *D52, PRIVATE_AS u32 *D53, PRIVATE_AS u32 *D54, PRIVATE_AS u32 *D55, PRIVATE_AS u32 *D56, PRIVATE_AS u32 *D57, PRIVATE_AS u32 *D58, PRIVATE_AS u32 *D59, PRIVATE_AS u32 *D60, PRIVATE_AS u32 *D61, PRIVATE_AS u32 *D62, PRIVATE_AS u32 *D63)
 1538 {
 1539   const u32 s001 = (0x001 & DESCRYPT_SALT) ? 1 : 0;
 1540   const u32 s002 = (0x002 & DESCRYPT_SALT) ? 1 : 0;
 1541   const u32 s004 = (0x004 & DESCRYPT_SALT) ? 1 : 0;
 1542   const u32 s008 = (0x008 & DESCRYPT_SALT) ? 1 : 0;
 1543   const u32 s010 = (0x010 & DESCRYPT_SALT) ? 1 : 0;
 1544   const u32 s020 = (0x020 & DESCRYPT_SALT) ? 1 : 0;
 1545   const u32 s040 = (0x040 & DESCRYPT_SALT) ? 1 : 0;
 1546   const u32 s080 = (0x080 & DESCRYPT_SALT) ? 1 : 0;
 1547   const u32 s100 = (0x100 & DESCRYPT_SALT) ? 1 : 0;
 1548   const u32 s200 = (0x200 & DESCRYPT_SALT) ? 1 : 0;
 1549   const u32 s400 = (0x400 & DESCRYPT_SALT) ? 1 : 0;
 1550   const u32 s800 = (0x800 & DESCRYPT_SALT) ? 1 : 0;
 1551 
 1552   KXX_DECL u32 k00, k01, k02, k03, k04, k05;
 1553   KXX_DECL u32 k06, k07, k08, k09, k10, k11;
 1554   KXX_DECL u32 k12, k13, k14, k15, k16, k17;
 1555   KXX_DECL u32 k18, k19, k20, k21, k22, k23;
 1556   KXX_DECL u32 k24, k25, k26, k27, k28, k29;
 1557   KXX_DECL u32 k30, k31, k32, k33, k34, k35;
 1558   KXX_DECL u32 k36, k37, k38, k39, k40, k41;
 1559   KXX_DECL u32 k42, k43, k44, k45, k46, k47;
 1560 
 1561   for (u32 ii = 0; ii < 25; ii++)
 1562   {
 1563     #ifdef _unroll
 1564     #pragma unroll
 1565     #endif
 1566     for (u32 i = 0; i < 2; i++)
 1567     {
 1568       if (i) KEYSET10 else KEYSET00
 1569 
 1570       s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
 1571       s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
 1572       s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
 1573       s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
 1574       s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
 1575       s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
 1576       s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
 1577       s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);
 1578 
 1579       if (i) KEYSET11 else KEYSET01
 1580 
 1581       s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
 1582       s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
 1583       s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
 1584       s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
 1585       s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
 1586       s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
 1587       s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
 1588       s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);
 1589 
 1590       if (i) KEYSET12 else KEYSET02
 1591 
 1592       s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
 1593       s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
 1594       s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
 1595       s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
 1596       s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
 1597       s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
 1598       s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
 1599       s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);
 1600 
 1601       if (i) KEYSET13 else KEYSET03
 1602 
 1603       s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
 1604       s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
 1605       s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
 1606       s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
 1607       s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
 1608       s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
 1609       s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
 1610       s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);
 1611 
 1612       if (i) KEYSET14 else KEYSET04
 1613 
 1614       s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
 1615       s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
 1616       s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
 1617       s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
 1618       s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
 1619       s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
 1620       s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
 1621       s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);
 1622 
 1623       if (i) KEYSET15 else KEYSET05
 1624 
 1625       s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
 1626       s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
 1627       s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
 1628       s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
 1629       s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
 1630       s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
 1631       s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
 1632       s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);
 1633 
 1634       if (i) KEYSET16 else KEYSET06
 1635 
 1636       s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
 1637       s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
 1638       s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
 1639       s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
 1640       s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
 1641       s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
 1642       s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
 1643       s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);
 1644 
 1645       if (i) KEYSET17 else KEYSET07
 1646 
 1647       s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
 1648       s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
 1649       s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
 1650       s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
 1651       s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
 1652       s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
 1653       s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
 1654       s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);
 1655     }
 1656 
 1657     DATASWAP;
 1658   }
 1659 
 1660   DATASWAP;
 1661 }
 1662 
 1663 #else
 1664 
 1665 DECLSPEC void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, PRIVATE_AS u32 *D00, PRIVATE_AS u32 *D01, PRIVATE_AS u32 *D02, PRIVATE_AS u32 *D03, PRIVATE_AS u32 *D04, PRIVATE_AS u32 *D05, PRIVATE_AS u32 *D06, PRIVATE_AS u32 *D07, PRIVATE_AS u32 *D08, PRIVATE_AS u32 *D09, PRIVATE_AS u32 *D10, PRIVATE_AS u32 *D11, PRIVATE_AS u32 *D12, PRIVATE_AS u32 *D13, PRIVATE_AS u32 *D14, PRIVATE_AS u32 *D15, PRIVATE_AS u32 *D16, PRIVATE_AS u32 *D17, PRIVATE_AS u32 *D18, PRIVATE_AS u32 *D19, PRIVATE_AS u32 *D20, PRIVATE_AS u32 *D21, PRIVATE_AS u32 *D22, PRIVATE_AS u32 *D23, PRIVATE_AS u32 *D24, PRIVATE_AS u32 *D25, PRIVATE_AS u32 *D26, PRIVATE_AS u32 *D27, PRIVATE_AS u32 *D28, PRIVATE_AS u32 *D29, PRIVATE_AS u32 *D30, PRIVATE_AS u32 *D31, PRIVATE_AS u32 *D32, PRIVATE_AS u32 *D33, PRIVATE_AS u32 *D34, PRIVATE_AS u32 *D35, PRIVATE_AS u32 *D36, PRIVATE_AS u32 *D37, PRIVATE_AS u32 *D38, PRIVATE_AS u32 *D39, PRIVATE_AS u32 *D40, PRIVATE_AS u32 *D41, PRIVATE_AS u32 *D42, PRIVATE_AS u32 *D43, PRIVATE_AS u32 *D44, PRIVATE_AS u32 *D45, PRIVATE_AS u32 *D46, PRIVATE_AS u32 *D47, PRIVATE_AS u32 *D48, PRIVATE_AS u32 *D49, PRIVATE_AS u32 *D50, PRIVATE_AS u32 *D51, PRIVATE_AS u32 *D52, PRIVATE_AS u32 *D53, PRIVATE_AS u32 *D54, PRIVATE_AS u32 *D55, PRIVATE_AS u32 *D56, PRIVATE_AS u32 *D57, PRIVATE_AS u32 *D58, PRIVATE_AS u32 *D59, PRIVATE_AS u32 *D60, PRIVATE_AS u32 *D61, PRIVATE_AS u32 *D62, PRIVATE_AS u32 *D63)
 1666 {
 1667   const u32 s001 = (0x001 & SALT) ? 1 : 0;
 1668   const u32 s002 = (0x002 & SALT) ? 1 : 0;
 1669   const u32 s004 = (0x004 & SALT) ? 1 : 0;
 1670   const u32 s008 = (0x008 & SALT) ? 1 : 0;
 1671   const u32 s010 = (0x010 & SALT) ? 1 : 0;
 1672   const u32 s020 = (0x020 & SALT) ? 1 : 0;
 1673   const u32 s040 = (0x040 & SALT) ? 1 : 0;
 1674   const u32 s080 = (0x080 & SALT) ? 1 : 0;
 1675   const u32 s100 = (0x100 & SALT) ? 1 : 0;
 1676   const u32 s200 = (0x200 & SALT) ? 1 : 0;
 1677   const u32 s400 = (0x400 & SALT) ? 1 : 0;
 1678   const u32 s800 = (0x800 & SALT) ? 1 : 0;
 1679 
 1680   KXX_DECL u32 k00, k01, k02, k03, k04, k05;
 1681   KXX_DECL u32 k06, k07, k08, k09, k10, k11;
 1682   KXX_DECL u32 k12, k13, k14, k15, k16, k17;
 1683   KXX_DECL u32 k18, k19, k20, k21, k22, k23;
 1684   KXX_DECL u32 k24, k25, k26, k27, k28, k29;
 1685   KXX_DECL u32 k30, k31, k32, k33, k34, k35;
 1686   KXX_DECL u32 k36, k37, k38, k39, k40, k41;
 1687   KXX_DECL u32 k42, k43, k44, k45, k46, k47;
 1688 
 1689   for (u32 ii = 0; ii < 25; ii++)
 1690   {
 1691     #ifdef _unroll
 1692     #pragma unroll
 1693     #endif
 1694     for (u32 i = 0; i < 2; i++)
 1695     {
 1696       if (i) KEYSET10 else KEYSET00
 1697 
 1698       s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
 1699       s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
 1700       s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
 1701       s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
 1702       s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
 1703       s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
 1704       s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
 1705       s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);
 1706 
 1707       if (i) KEYSET11 else KEYSET01
 1708 
 1709       s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
 1710       s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
 1711       s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
 1712       s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
 1713       s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
 1714       s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
 1715       s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
 1716       s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);
 1717 
 1718       if (i) KEYSET12 else KEYSET02
 1719 
 1720       s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
 1721       s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
 1722       s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
 1723       s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
 1724       s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
 1725       s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
 1726       s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
 1727       s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);
 1728 
 1729       if (i) KEYSET13 else KEYSET03
 1730 
 1731       s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
 1732       s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
 1733       s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
 1734       s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
 1735       s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
 1736       s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
 1737       s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
 1738       s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);
 1739 
 1740       if (i) KEYSET14 else KEYSET04
 1741 
 1742       s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
 1743       s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
 1744       s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
 1745       s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
 1746       s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
 1747       s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
 1748       s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
 1749       s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);
 1750 
 1751       if (i) KEYSET15 else KEYSET05
 1752 
 1753       s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
 1754       s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
 1755       s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
 1756       s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
 1757       s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
 1758       s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
 1759       s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
 1760       s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);
 1761 
 1762       if (i) KEYSET16 else KEYSET06
 1763 
 1764       s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
 1765       s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
 1766       s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
 1767       s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
 1768       s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
 1769       s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
 1770       s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
 1771       s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);
 1772 
 1773       if (i) KEYSET17 else KEYSET07
 1774 
 1775       s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
 1776       s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
 1777       s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
 1778       s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
 1779       s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
 1780       s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
 1781       s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
 1782       s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);
 1783     }
 1784 
 1785     DATASWAP;
 1786   }
 1787 
 1788   DATASWAP;
 1789 }
 1790 
 1791 #endif
 1792 
 1793 DECLSPEC void transpose32c (PRIVATE_AS u32 *data)
 1794 {
 1795   #define swap(x,y,j,m)               \
 1796      t  = ((x) ^ ((y) >> (j))) & (m); \
 1797     (x) = (x) ^ t;                    \
 1798     (y) = (y) ^ (t << (j));
 1799 
 1800   u32 t;
 1801 
 1802   swap (data[ 0], data[16], 16, 0x0000ffff);
 1803   swap (data[ 1], data[17], 16, 0x0000ffff);
 1804   swap (data[ 2], data[18], 16, 0x0000ffff);
 1805   swap (data[ 3], data[19], 16, 0x0000ffff);
 1806   swap (data[ 4], data[20], 16, 0x0000ffff);
 1807   swap (data[ 5], data[21], 16, 0x0000ffff);
 1808   swap (data[ 6], data[22], 16, 0x0000ffff);
 1809   swap (data[ 7], data[23], 16, 0x0000ffff);
 1810   swap (data[ 8], data[24], 16, 0x0000ffff);
 1811   swap (data[ 9], data[25], 16, 0x0000ffff);
 1812   swap (data[10], data[26], 16, 0x0000ffff);
 1813   swap (data[11], data[27], 16, 0x0000ffff);
 1814   swap (data[12], data[28], 16, 0x0000ffff);
 1815   swap (data[13], data[29], 16, 0x0000ffff);
 1816   swap (data[14], data[30], 16, 0x0000ffff);
 1817   swap (data[15], data[31], 16, 0x0000ffff);
 1818   swap (data[ 0], data[ 8],  8, 0x00ff00ff);
 1819   swap (data[ 1], data[ 9],  8, 0x00ff00ff);
 1820   swap (data[ 2], data[10],  8, 0x00ff00ff);
 1821   swap (data[ 3], data[11],  8, 0x00ff00ff);
 1822   swap (data[ 4], data[12],  8, 0x00ff00ff);
 1823   swap (data[ 5], data[13],  8, 0x00ff00ff);
 1824   swap (data[ 6], data[14],  8, 0x00ff00ff);
 1825   swap (data[ 7], data[15],  8, 0x00ff00ff);
 1826   swap (data[ 0], data[ 4],  4, 0x0f0f0f0f);
 1827   swap (data[ 1], data[ 5],  4, 0x0f0f0f0f);
 1828   swap (data[ 2], data[ 6],  4, 0x0f0f0f0f);
 1829   swap (data[ 3], data[ 7],  4, 0x0f0f0f0f);
 1830   swap (data[ 0], data[ 2],  2, 0x33333333);
 1831   swap (data[ 1], data[ 3],  2, 0x33333333);
 1832   swap (data[ 0], data[ 1],  1, 0x55555555);
 1833   swap (data[ 2], data[ 3],  1, 0x55555555);
 1834   swap (data[ 4], data[ 6],  2, 0x33333333);
 1835   swap (data[ 5], data[ 7],  2, 0x33333333);
 1836   swap (data[ 4], data[ 5],  1, 0x55555555);
 1837   swap (data[ 6], data[ 7],  1, 0x55555555);
 1838   swap (data[ 8], data[12],  4, 0x0f0f0f0f);
 1839   swap (data[ 9], data[13],  4, 0x0f0f0f0f);
 1840   swap (data[10], data[14],  4, 0x0f0f0f0f);
 1841   swap (data[11], data[15],  4, 0x0f0f0f0f);
 1842   swap (data[ 8], data[10],  2, 0x33333333);
 1843   swap (data[ 9], data[11],  2, 0x33333333);
 1844   swap (data[ 8], data[ 9],  1, 0x55555555);
 1845   swap (data[10], data[11],  1, 0x55555555);
 1846   swap (data[12], data[14],  2, 0x33333333);
 1847   swap (data[13], data[15],  2, 0x33333333);
 1848   swap (data[12], data[13],  1, 0x55555555);
 1849   swap (data[14], data[15],  1, 0x55555555);
 1850   swap (data[16], data[24],  8, 0x00ff00ff);
 1851   swap (data[17], data[25],  8, 0x00ff00ff);
 1852   swap (data[18], data[26],  8, 0x00ff00ff);
 1853   swap (data[19], data[27],  8, 0x00ff00ff);
 1854   swap (data[20], data[28],  8, 0x00ff00ff);
 1855   swap (data[21], data[29],  8, 0x00ff00ff);
 1856   swap (data[22], data[30],  8, 0x00ff00ff);
 1857   swap (data[23], data[31],  8, 0x00ff00ff);
 1858   swap (data[16], data[20],  4, 0x0f0f0f0f);
 1859   swap (data[17], data[21],  4, 0x0f0f0f0f);
 1860   swap (data[18], data[22],  4, 0x0f0f0f0f);
 1861   swap (data[19], data[23],  4, 0x0f0f0f0f);
 1862   swap (data[16], data[18],  2, 0x33333333);
 1863   swap (data[17], data[19],  2, 0x33333333);
 1864   swap (data[16], data[17],  1, 0x55555555);
 1865   swap (data[18], data[19],  1, 0x55555555);
 1866   swap (data[20], data[22],  2, 0x33333333);
 1867   swap (data[21], data[23],  2, 0x33333333);
 1868   swap (data[20], data[21],  1, 0x55555555);
 1869   swap (data[22], data[23],  1, 0x55555555);
 1870   swap (data[24], data[28],  4, 0x0f0f0f0f);
 1871   swap (data[25], data[29],  4, 0x0f0f0f0f);
 1872   swap (data[26], data[30],  4, 0x0f0f0f0f);
 1873   swap (data[27], data[31],  4, 0x0f0f0f0f);
 1874   swap (data[24], data[26],  2, 0x33333333);
 1875   swap (data[25], data[27],  2, 0x33333333);
 1876   swap (data[24], data[25],  1, 0x55555555);
 1877   swap (data[26], data[27],  1, 0x55555555);
 1878   swap (data[28], data[30],  2, 0x33333333);
 1879   swap (data[29], data[31],  2, 0x33333333);
 1880   swap (data[28], data[29],  1, 0x55555555);
 1881   swap (data[30], data[31],  1, 0x55555555);
 1882 }
 1883 
 1884 //
 1885 // transpose bitslice mod : attention race conditions, need different buffers for *in and *out
 1886 //
 1887 
 1888 KERNEL_FQ void m01500_tm (KERN_ATTR_TM)
 1889 {
 1890   const u64 gid = get_global_id (0);
 1891 
 1892   // if (gid >= GID_CNT) return;
 1893 
 1894   const u32 block = gid / 32;
 1895   const u32 slice = gid % 32;
 1896 
 1897   const u32 w0 = mod[gid];
 1898 
 1899   const u32 w0s = (w0 << 1) & 0xfefefefe;
 1900 
 1901   #ifdef _unroll
 1902   #pragma unroll
 1903   #endif
 1904   for (int i = 0, j = 0; i < 32; i += 8, j += 7)
 1905   {
 1906     hc_atomic_or (&words_buf_b[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
 1907     hc_atomic_or (&words_buf_b[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
 1908     hc_atomic_or (&words_buf_b[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
 1909     hc_atomic_or (&words_buf_b[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
 1910     hc_atomic_or (&words_buf_b[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
 1911     hc_atomic_or (&words_buf_b[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
 1912     hc_atomic_or (&words_buf_b[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
 1913   }
 1914 }
 1915 
 1916 KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ())
 1917 {
 1918   /**
 1919    * base
 1920    */
 1921 
 1922   const u64 gid = get_global_id (0);
 1923   const u64 lid = get_local_id (0);
 1924 
 1925   if (gid >= GID_CNT) return;
 1926 
 1927   /**
 1928    * salt
 1929    */
 1930 
 1931   const u32 salt = salt_bufs[SALT_POS_HOST].salt_buf[0];
 1932 
 1933   /**
 1934    * base
 1935    */
 1936 
 1937   const u32 w0 = pws[gid].i[0];
 1938   const u32 w1 = pws[gid].i[1];
 1939 
 1940   const u32 w0s = (w0 << 1) & 0xfefefefe;
 1941   const u32 w1s = (w1 << 1) & 0xfefefefe;
 1942 
 1943   #define K00 (((w0s >> ( 0 + 7)) & 1) ? -1 : 0)
 1944   #define K01 (((w0s >> ( 0 + 6)) & 1) ? -1 : 0)
 1945   #define K02 (((w0s >> ( 0 + 5)) & 1) ? -1 : 0)
 1946   #define K03 (((w0s >> ( 0 + 4)) & 1) ? -1 : 0)
 1947   #define K04 (((w0s >> ( 0 + 3)) & 1) ? -1 : 0)
 1948   #define K05 (((w0s >> ( 0 + 2)) & 1) ? -1 : 0)
 1949   #define K06 (((w0s >> ( 0 + 1)) & 1) ? -1 : 0)
 1950   #define K07 (((w0s >> ( 8 + 7)) & 1) ? -1 : 0)
 1951   #define K08 (((w0s >> ( 8 + 6)) & 1) ? -1 : 0)
 1952   #define K09 (((w0s >> ( 8 + 5)) & 1) ? -1 : 0)
 1953   #define K10 (((w0s >> ( 8 + 4)) & 1) ? -1 : 0)
 1954   #define K11 (((w0s >> ( 8 + 3)) & 1) ? -1 : 0)
 1955   #define K12 (((w0s >> ( 8 + 2)) & 1) ? -1 : 0)
 1956   #define K13 (((w0s >> ( 8 + 1)) & 1) ? -1 : 0)
 1957   #define K14 (((w0s >> (16 + 7)) & 1) ? -1 : 0)
 1958   #define K15 (((w0s >> (16 + 6)) & 1) ? -1 : 0)
 1959   #define K16 (((w0s >> (16 + 5)) & 1) ? -1 : 0)
 1960   #define K17 (((w0s >> (16 + 4)) & 1) ? -1 : 0)
 1961   #define K18 (((w0s >> (16 + 3)) & 1) ? -1 : 0)
 1962   #define K19 (((w0s >> (16 + 2)) & 1) ? -1 : 0)
 1963   #define K20 (((w0s >> (16 + 1)) & 1) ? -1 : 0)
 1964   #define K21 (((w0s >> (24 + 7)) & 1) ? -1 : 0)
 1965   #define K22 (((w0s >> (24 + 6)) & 1) ? -1 : 0)
 1966   #define K23 (((w0s >> (24 + 5)) & 1) ? -1 : 0)
 1967   #define K24 (((w0s >> (24 + 4)) & 1) ? -1 : 0)
 1968   #define K25 (((w0s >> (24 + 3)) & 1) ? -1 : 0)
 1969   #define K26 (((w0s >> (24 + 2)) & 1) ? -1 : 0)
 1970   #define K27 (((w0s >> (24 + 1)) & 1) ? -1 : 0)
 1971   #define K28 (((w1s >> ( 0 + 7)) & 1) ? -1 : 0)
 1972   #define K29 (((w1s >> ( 0 + 6)) & 1) ? -1 : 0)
 1973   #define K30 (((w1s >> ( 0 + 5)) & 1) ? -1 : 0)
 1974   #define K31 (((w1s >> ( 0 + 4)) & 1) ? -1 : 0)
 1975   #define K32 (((w1s >> ( 0 + 3)) & 1) ? -1 : 0)
 1976   #define K33 (((w1s >> ( 0 + 2)) & 1) ? -1 : 0)
 1977   #define K34 (((w1s >> ( 0 + 1)) & 1) ? -1 : 0)
 1978   #define K35 (((w1s >> ( 8 + 7)) & 1) ? -1 : 0)
 1979   #define K36 (((w1s >> ( 8 + 6)) & 1) ? -1 : 0)
 1980   #define K37 (((w1s >> ( 8 + 5)) & 1) ? -1 : 0)
 1981   #define K38 (((w1s >> ( 8 + 4)) & 1) ? -1 : 0)
 1982   #define K39 (((w1s >> ( 8 + 3)) & 1) ? -1 : 0)
 1983   #define K40 (((w1s >> ( 8 + 2)) & 1) ? -1 : 0)
 1984   #define K41 (((w1s >> ( 8 + 1)) & 1) ? -1 : 0)
 1985   #define K42 (((w1s >> (16 + 7)) & 1) ? -1 : 0)
 1986   #define K43 (((w1s >> (16 + 6)) & 1) ? -1 : 0)
 1987   #define K44 (((w1s >> (16 + 5)) & 1) ? -1 : 0)
 1988   #define K45 (((w1s >> (16 + 4)) & 1) ? -1 : 0)
 1989   #define K46 (((w1s >> (16 + 3)) & 1) ? -1 : 0)
 1990   #define K47 (((w1s >> (16 + 2)) & 1) ? -1 : 0)
 1991   #define K48 (((w1s >> (16 + 1)) & 1) ? -1 : 0)
 1992   #define K49 (((w1s >> (24 + 7)) & 1) ? -1 : 0)
 1993   #define K50 (((w1s >> (24 + 6)) & 1) ? -1 : 0)
 1994   #define K51 (((w1s >> (24 + 5)) & 1) ? -1 : 0)
 1995   #define K52 (((w1s >> (24 + 4)) & 1) ? -1 : 0)
 1996   #define K53 (((w1s >> (24 + 3)) & 1) ? -1 : 0)
 1997   #define K54 (((w1s >> (24 + 2)) & 1) ? -1 : 0)
 1998   #define K55 (((w1s >> (24 + 1)) & 1) ? -1 : 0)
 1999 
 2000   /**
 2001    * inner loop
 2002    */
 2003 
 2004   for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += 32)
 2005   {
 2006     u32 k00 = K00;
 2007     u32 k01 = K01;
 2008     u32 k02 = K02;
 2009     u32 k03 = K03;
 2010     u32 k04 = K04;
 2011     u32 k05 = K05;
 2012     u32 k06 = K06;
 2013     u32 k07 = K07;
 2014     u32 k08 = K08;
 2015     u32 k09 = K09;
 2016     u32 k10 = K10;
 2017     u32 k11 = K11;
 2018     u32 k12 = K12;
 2019     u32 k13 = K13;
 2020     u32 k14 = K14;
 2021     u32 k15 = K15;
 2022     u32 k16 = K16;
 2023     u32 k17 = K17;
 2024     u32 k18 = K18;
 2025     u32 k19 = K19;
 2026     u32 k20 = K20;
 2027     u32 k21 = K21;
 2028     u32 k22 = K22;
 2029     u32 k23 = K23;
 2030     u32 k24 = K24;
 2031     u32 k25 = K25;
 2032     u32 k26 = K26;
 2033     u32 k27 = K27;
 2034 
 2035     const u32 pc_pos = il_pos / 32;
 2036 
 2037     k00 |= words_buf_s[pc_pos].b[ 0];
 2038     k01 |= words_buf_s[pc_pos].b[ 1];
 2039     k02 |= words_buf_s[pc_pos].b[ 2];
 2040     k03 |= words_buf_s[pc_pos].b[ 3];
 2041     k04 |= words_buf_s[pc_pos].b[ 4];
 2042     k05 |= words_buf_s[pc_pos].b[ 5];
 2043     k06 |= words_buf_s[pc_pos].b[ 6];
 2044     k07 |= words_buf_s[pc_pos].b[ 7];
 2045     k08 |= words_buf_s[pc_pos].b[ 8];
 2046     k09 |= words_buf_s[pc_pos].b[ 9];
 2047     k10 |= words_buf_s[pc_pos].b[10];
 2048     k11 |= words_buf_s[pc_pos].b[11];
 2049     k12 |= words_buf_s[pc_pos].b[12];
 2050     k13 |= words_buf_s[pc_pos].b[13];
 2051     k14 |= words_buf_s[pc_pos].b[14];
 2052     k15 |= words_buf_s[pc_pos].b[15];
 2053     k16 |= words_buf_s[pc_pos].b[16];
 2054     k17 |= words_buf_s[pc_pos].b[17];
 2055     k18 |= words_buf_s[pc_pos].b[18];
 2056     k19 |= words_buf_s[pc_pos].b[19];
 2057     k20 |= words_buf_s[pc_pos].b[20];
 2058     k21 |= words_buf_s[pc_pos].b[21];
 2059     k22 |= words_buf_s[pc_pos].b[22];
 2060     k23 |= words_buf_s[pc_pos].b[23];
 2061     k24 |= words_buf_s[pc_pos].b[24];
 2062     k25 |= words_buf_s[pc_pos].b[25];
 2063     k26 |= words_buf_s[pc_pos].b[26];
 2064     k27 |= words_buf_s[pc_pos].b[27];
 2065 
 2066     u32 D00 = 0;
 2067     u32 D01 = 0;
 2068     u32 D02 = 0;
 2069     u32 D03 = 0;
 2070     u32 D04 = 0;
 2071     u32 D05 = 0;
 2072     u32 D06 = 0;
 2073     u32 D07 = 0;
 2074     u32 D08 = 0;
 2075     u32 D09 = 0;
 2076     u32 D10 = 0;
 2077     u32 D11 = 0;
 2078     u32 D12 = 0;
 2079     u32 D13 = 0;
 2080     u32 D14 = 0;
 2081     u32 D15 = 0;
 2082     u32 D16 = 0;
 2083     u32 D17 = 0;
 2084     u32 D18 = 0;
 2085     u32 D19 = 0;
 2086     u32 D20 = 0;
 2087     u32 D21 = 0;
 2088     u32 D22 = 0;
 2089     u32 D23 = 0;
 2090     u32 D24 = 0;
 2091     u32 D25 = 0;
 2092     u32 D26 = 0;
 2093     u32 D27 = 0;
 2094     u32 D28 = 0;
 2095     u32 D29 = 0;
 2096     u32 D30 = 0;
 2097     u32 D31 = 0;
 2098     u32 D32 = 0;
 2099     u32 D33 = 0;
 2100     u32 D34 = 0;
 2101     u32 D35 = 0;
 2102     u32 D36 = 0;
 2103     u32 D37 = 0;
 2104     u32 D38 = 0;
 2105     u32 D39 = 0;
 2106     u32 D40 = 0;
 2107     u32 D41 = 0;
 2108     u32 D42 = 0;
 2109     u32 D43 = 0;
 2110     u32 D44 = 0;
 2111     u32 D45 = 0;
 2112     u32 D46 = 0;
 2113     u32 D47 = 0;
 2114     u32 D48 = 0;
 2115     u32 D49 = 0;
 2116     u32 D50 = 0;
 2117     u32 D51 = 0;
 2118     u32 D52 = 0;
 2119     u32 D53 = 0;
 2120     u32 D54 = 0;
 2121     u32 D55 = 0;
 2122     u32 D56 = 0;
 2123     u32 D57 = 0;
 2124     u32 D58 = 0;
 2125     u32 D59 = 0;
 2126     u32 D60 = 0;
 2127     u32 D61 = 0;
 2128     u32 D62 = 0;
 2129     u32 D63 = 0;
 2130 
 2131     DESCrypt
 2132     (
 2133       salt,
 2134       k00, k01, k02, k03, k04, k05, k06,
 2135       k07, k08, k09, k10, k11, k12, k13,
 2136       k14, k15, k16, k17, k18, k19, k20,
 2137       k21, k22, k23, k24, k25, k26, k27,
 2138       K28, K29, K30, K31, K32, K33, K34,
 2139       K35, K36, K37, K38, K39, K40, K41,
 2140       K42, K43, K44, K45, K46, K47, K48,
 2141       K49, K50, K51, K52, K53, K54, K55,
 2142       &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
 2143       &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
 2144       &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
 2145       &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
 2146       &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
 2147       &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
 2148       &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
 2149       &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
 2150     );
 2151 
 2152     u32 out[64];
 2153 
 2154     out[ 0] = D00;
 2155     out[ 1] = D01;
 2156     out[ 2] = D02;
 2157     out[ 3] = D03;
 2158     out[ 4] = D04;
 2159     out[ 5] = D05;
 2160     out[ 6] = D06;
 2161     out[ 7] = D07;
 2162     out[ 8] = D08;
 2163     out[ 9] = D09;
 2164     out[10] = D10;
 2165     out[11] = D11;
 2166     out[12] = D12;
 2167     out[13] = D13;
 2168     out[14] = D14;
 2169     out[15] = D15;
 2170     out[16] = D16;
 2171     out[17] = D17;
 2172     out[18] = D18;
 2173     out[19] = D19;
 2174     out[20] = D20;
 2175     out[21] = D21;
 2176     out[22] = D22;
 2177     out[23] = D23;
 2178     out[24] = D24;
 2179     out[25] = D25;
 2180     out[26] = D26;
 2181     out[27] = D27;
 2182     out[28] = D28;
 2183     out[29] = D29;
 2184     out[30] = D30;
 2185     out[31] = D31;
 2186     out[32] = D32;
 2187     out[33] = D33;
 2188     out[34] = D34;
 2189     out[35] = D35;
 2190     out[36] = D36;
 2191     out[37] = D37;
 2192     out[38] = D38;
 2193     out[39] = D39;
 2194     out[40] = D40;
 2195     out[41] = D41;
 2196     out[42] = D42;
 2197     out[43] = D43;
 2198     out[44] = D44;
 2199     out[45] = D45;
 2200     out[46] = D46;
 2201     out[47] = D47;
 2202     out[48] = D48;
 2203     out[49] = D49;
 2204     out[50] = D50;
 2205     out[51] = D51;
 2206     out[52] = D52;
 2207     out[53] = D53;
 2208     out[54] = D54;
 2209     out[55] = D55;
 2210     out[56] = D56;
 2211     out[57] = D57;
 2212     out[58] = D58;
 2213     out[59] = D59;
 2214     out[60] = D60;
 2215     out[61] = D61;
 2216     out[62] = D62;
 2217     out[63] = D63;
 2218 
 2219     if (DIGESTS_CNT < 16)
 2220     {
 2221       for (u32 d = 0; d < DIGESTS_CNT; d++)
 2222       {
 2223         const u32 final_hash_pos = DIGESTS_OFFSET_HOST + d;
 2224 
 2225         if (hashes_shown[final_hash_pos]) continue;
 2226 
 2227         u32 search[2];
 2228 
 2229         search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
 2230         search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];
 2231 
 2232         u32 tmpResult = 0;
 2233 
 2234         #pragma unroll
 2235         for (int i = 0; i < 32; i++)
 2236         {
 2237           const u32 b0 = -((search[0] >> i) & 1);
 2238           const u32 b1 = -((search[1] >> i) & 1);
 2239 
 2240           tmpResult |= out[ 0 + i] ^ b0;
 2241           tmpResult |= out[32 + i] ^ b1;
 2242         }
 2243 
 2244         if (tmpResult == 0xffffffff) continue;
 2245 
 2246         const u32 slice = ffz (tmpResult);
 2247 
 2248         const u32 r0 = search[0];
 2249         const u32 r1 = search[1];
 2250         #ifdef KERNEL_STATIC
 2251         const u32 r2 = 0;
 2252         const u32 r3 = 0;
 2253         #endif
 2254 
 2255         #include COMPARE_M
 2256       }
 2257     }
 2258     else
 2259     {
 2260       u32 out0[32];
 2261       u32 out1[32];
 2262 
 2263       #pragma unroll
 2264       for (int i = 0; i < 32; i++)
 2265       {
 2266         out0[i] = out[ 0 + 31 - i];
 2267         out1[i] = out[32 + 31 - i];
 2268       }
 2269 
 2270       transpose32c (out0);
 2271       transpose32c (out1);
 2272 
 2273       #pragma unroll
 2274       for (int slice = 0; slice < 32; slice++)
 2275       {
 2276         const u32 r0 = out0[31 - slice];
 2277         const u32 r1 = out1[31 - slice];
 2278         #ifdef KERNEL_STATIC
 2279         const u32 r2 = 0;
 2280         const u32 r3 = 0;
 2281         #endif
 2282 
 2283         #include COMPARE_M
 2284       }
 2285     }
 2286   }
 2287 }
 2288 
 2289 KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ())
 2290 {
 2291   /**
 2292    * base
 2293    */
 2294 
 2295   const u64 gid = get_global_id (0);
 2296   const u64 lid = get_local_id (0);
 2297 
 2298   if (gid >= GID_CNT) return;
 2299 
 2300   /**
 2301    * salt
 2302    */
 2303 
 2304   const u32 salt = salt_bufs[SALT_POS_HOST].salt_buf[0];
 2305 
 2306   /**
 2307    * digest
 2308    */
 2309 
 2310   const u32 s0 = digests_buf[0].digest_buf[0];
 2311   const u32 s1 = digests_buf[0].digest_buf[1];
 2312 
 2313   const u32 S00 = (((s0 >>  0) & 1) ? -1 : 0);
 2314   const u32 S01 = (((s0 >>  1) & 1) ? -1 : 0);
 2315   const u32 S02 = (((s0 >>  2) & 1) ? -1 : 0);
 2316   const u32 S03 = (((s0 >>  3) & 1) ? -1 : 0);
 2317   const u32 S04 = (((s0 >>  4) & 1) ? -1 : 0);
 2318   const u32 S05 = (((s0 >>  5) & 1) ? -1 : 0);
 2319   const u32 S06 = (((s0 >>  6) & 1) ? -1 : 0);
 2320   const u32 S07 = (((s0 >>  7) & 1) ? -1 : 0);
 2321   const u32 S08 = (((s0 >>  8) & 1) ? -1 : 0);
 2322   const u32 S09 = (((s0 >>  9) & 1) ? -1 : 0);
 2323   const u32 S10 = (((s0 >> 10) & 1) ? -1 : 0);
 2324   const u32 S11 = (((s0 >> 11) & 1) ? -1 : 0);
 2325   const u32 S12 = (((s0 >> 12) & 1) ? -1 : 0);
 2326   const u32 S13 = (((s0 >> 13) & 1) ? -1 : 0);
 2327   const u32 S14 = (((s0 >> 14) & 1) ? -1 : 0);
 2328   const u32 S15 = (((s0 >> 15) & 1) ? -1 : 0);
 2329   const u32 S16 = (((s0 >> 16) & 1) ? -1 : 0);
 2330   const u32 S17 = (((s0 >> 17) & 1) ? -1 : 0);
 2331   const u32 S18 = (((s0 >> 18) & 1) ? -1 : 0);
 2332   const u32 S19 = (((s0 >> 19) & 1) ? -1 : 0);
 2333   const u32 S20 = (((s0 >> 20) & 1) ? -1 : 0);
 2334   const u32 S21 = (((s0 >> 21) & 1) ? -1 : 0);
 2335   const u32 S22 = (((s0 >> 22) & 1) ? -1 : 0);
 2336   const u32 S23 = (((s0 >> 23) & 1) ? -1 : 0);
 2337   const u32 S24 = (((s0 >> 24) & 1) ? -1 : 0);
 2338   const u32 S25 = (((s0 >> 25) & 1) ? -1 : 0);
 2339   const u32 S26 = (((s0 >> 26) & 1) ? -1 : 0);
 2340   const u32 S27 = (((s0 >> 27) & 1) ? -1 : 0);
 2341   const u32 S28 = (((s0 >> 28) & 1) ? -1 : 0);
 2342   const u32 S29 = (((s0 >> 29) & 1) ? -1 : 0);
 2343   const u32 S30 = (((s0 >> 30) & 1) ? -1 : 0);
 2344   const u32 S31 = (((s0 >> 31) & 1) ? -1 : 0);
 2345   const u32 S32 = (((s1 >>  0) & 1) ? -1 : 0);
 2346   const u32 S33 = (((s1 >>  1) & 1) ? -1 : 0);
 2347   const u32 S34 = (((s1 >>  2) & 1) ? -1 : 0);
 2348   const u32 S35 = (((s1 >>  3) & 1) ? -1 : 0);
 2349   const u32 S36 = (((s1 >>  4) & 1) ? -1 : 0);
 2350   const u32 S37 = (((s1 >>  5) & 1) ? -1 : 0);
 2351   const u32 S38 = (((s1 >>  6) & 1) ? -1 : 0);
 2352   const u32 S39 = (((s1 >>  7) & 1) ? -1 : 0);
 2353   const u32 S40 = (((s1 >>  8) & 1) ? -1 : 0);
 2354   const u32 S41 = (((s1 >>  9) & 1) ? -1 : 0);
 2355   const u32 S42 = (((s1 >> 10) & 1) ? -1 : 0);
 2356   const u32 S43 = (((s1 >> 11) & 1) ? -1 : 0);
 2357   const u32 S44 = (((s1 >> 12) & 1) ? -1 : 0);
 2358   const u32 S45 = (((s1 >> 13) & 1) ? -1 : 0);
 2359   const u32 S46 = (((s1 >> 14) & 1) ? -1 : 0);
 2360   const u32 S47 = (((s1 >> 15) & 1) ? -1 : 0);
 2361   const u32 S48 = (((s1 >> 16) & 1) ? -1 : 0);
 2362   const u32 S49 = (((s1 >> 17) & 1) ? -1 : 0);
 2363   const u32 S50 = (((s1 >> 18) & 1) ? -1 : 0);
 2364   const u32 S51 = (((s1 >> 19) & 1) ? -1 : 0);
 2365   const u32 S52 = (((s1 >> 20) & 1) ? -1 : 0);
 2366   const u32 S53 = (((s1 >> 21) & 1) ? -1 : 0);
 2367   const u32 S54 = (((s1 >> 22) & 1) ? -1 : 0);
 2368   const u32 S55 = (((s1 >> 23) & 1) ? -1 : 0);
 2369   const u32 S56 = (((s1 >> 24) & 1) ? -1 : 0);
 2370   const u32 S57 = (((s1 >> 25) & 1) ? -1 : 0);
 2371   const u32 S58 = (((s1 >> 26) & 1) ? -1 : 0);
 2372   const u32 S59 = (((s1 >> 27) & 1) ? -1 : 0);
 2373   const u32 S60 = (((s1 >> 28) & 1) ? -1 : 0);
 2374   const u32 S61 = (((s1 >> 29) & 1) ? -1 : 0);
 2375   const u32 S62 = (((s1 >> 30) & 1) ? -1 : 0);
 2376   const u32 S63 = (((s1 >> 31) & 1) ? -1 : 0);
 2377 
 2378   /**
 2379    * base
 2380    */
 2381 
 2382   const u32 w0 = pws[gid].i[0];
 2383   const u32 w1 = pws[gid].i[1];
 2384 
 2385   const u32 w0s = (w0 << 1) & 0xfefefefe;
 2386   const u32 w1s = (w1 << 1) & 0xfefefefe;
 2387 
 2388   #define K00 (((w0s >> ( 0 + 7)) & 1) ? -1 : 0)
 2389   #define K01 (((w0s >> ( 0 + 6)) & 1) ? -1 : 0)
 2390   #define K02 (((w0s >> ( 0 + 5)) & 1) ? -1 : 0)
 2391   #define K03 (((w0s >> ( 0 + 4)) & 1) ? -1 : 0)
 2392   #define K04 (((w0s >> ( 0 + 3)) & 1) ? -1 : 0)
 2393   #define K05 (((w0s >> ( 0 + 2)) & 1) ? -1 : 0)
 2394   #define K06 (((w0s >> ( 0 + 1)) & 1) ? -1 : 0)
 2395   #define K07 (((w0s >> ( 8 + 7)) & 1) ? -1 : 0)
 2396   #define K08 (((w0s >> ( 8 + 6)) & 1) ? -1 : 0)
 2397   #define K09 (((w0s >> ( 8 + 5)) & 1) ? -1 : 0)
 2398   #define K10 (((w0s >> ( 8 + 4)) & 1) ? -1 : 0)
 2399   #define K11 (((w0s >> ( 8 + 3)) & 1) ? -1 : 0)
 2400   #define K12 (((w0s >> ( 8 + 2)) & 1) ? -1 : 0)
 2401   #define K13 (((w0s >> ( 8 + 1)) & 1) ? -1 : 0)
 2402   #define K14 (((w0s >> (16 + 7)) & 1) ? -1 : 0)
 2403   #define K15 (((w0s >> (16 + 6)) & 1) ? -1 : 0)
 2404   #define K16 (((w0s >> (16 + 5)) & 1) ? -1 : 0)
 2405   #define K17 (((w0s >> (16 + 4)) & 1) ? -1 : 0)
 2406   #define K18 (((w0s >> (16 + 3)) & 1) ? -1 : 0)
 2407   #define K19 (((w0s >> (16 + 2)) & 1) ? -1 : 0)
 2408   #define K20 (((w0s >> (16 + 1)) & 1) ? -1 : 0)
 2409   #define K21 (((w0s >> (24 + 7)) & 1) ? -1 : 0)
 2410   #define K22 (((w0s >> (24 + 6)) & 1) ? -1 : 0)
 2411   #define K23 (((w0s >> (24 + 5)) & 1) ? -1 : 0)
 2412   #define K24 (((w0s >> (24 + 4)) & 1) ? -1 : 0)
 2413   #define K25 (((w0s >> (24 + 3)) & 1) ? -1 : 0)
 2414   #define K26 (((w0s >> (24 + 2)) & 1) ? -1 : 0)
 2415   #define K27 (((w0s >> (24 + 1)) & 1) ? -1 : 0)
 2416   #define K28 (((w1s >> ( 0 + 7)) & 1) ? -1 : 0)
 2417   #define K29 (((w1s >> ( 0 + 6)) & 1) ? -1 : 0)
 2418   #define K30 (((w1s >> ( 0 + 5)) & 1) ? -1 : 0)
 2419   #define K31 (((w1s >> ( 0 + 4)) & 1) ? -1 : 0)
 2420   #define K32 (((w1s >> ( 0 + 3)) & 1) ? -1 : 0)
 2421   #define K33 (((w1s >> ( 0 + 2)) & 1) ? -1 : 0)
 2422   #define K34 (((w1s >> ( 0 + 1)) & 1) ? -1 : 0)
 2423   #define K35 (((w1s >> ( 8 + 7)) & 1) ? -1 : 0)
 2424   #define K36 (((w1s >> ( 8 + 6)) & 1) ? -1 : 0)
 2425   #define K37 (((w1s >> ( 8 + 5)) & 1) ? -1 : 0)
 2426   #define K38 (((w1s >> ( 8 + 4)) & 1) ? -1 : 0)
 2427   #define K39 (((w1s >> ( 8 + 3)) & 1) ? -1 : 0)
 2428   #define K40 (((w1s >> ( 8 + 2)) & 1) ? -1 : 0)
 2429   #define K41 (((w1s >> ( 8 + 1)) & 1) ? -1 : 0)
 2430   #define K42 (((w1s >> (16 + 7)) & 1) ? -1 : 0)
 2431   #define K43 (((w1s >> (16 + 6)) & 1) ? -1 : 0)
 2432   #define K44 (((w1s >> (16 + 5)) & 1) ? -1 : 0)
 2433   #define K45 (((w1s >> (16 + 4)) & 1) ? -1 : 0)
 2434   #define K46 (((w1s >> (16 + 3)) & 1) ? -1 : 0)
 2435   #define K47 (((w1s >> (16 + 2)) & 1) ? -1 : 0)
 2436   #define K48 (((w1s >> (16 + 1)) & 1) ? -1 : 0)
 2437   #define K49 (((w1s >> (24 + 7)) & 1) ? -1 : 0)
 2438   #define K50 (((w1s >> (24 + 6)) & 1) ? -1 : 0)
 2439   #define K51 (((w1s >> (24 + 5)) & 1) ? -1 : 0)
 2440   #define K52 (((w1s >> (24 + 4)) & 1) ? -1 : 0)
 2441   #define K53 (((w1s >> (24 + 3)) & 1) ? -1 : 0)
 2442   #define K54 (((w1s >> (24 + 2)) & 1) ? -1 : 0)
 2443   #define K55 (((w1s >> (24 + 1)) & 1) ? -1 : 0)
 2444 
 2445   /**
 2446    * inner loop
 2447    */
 2448 
 2449   for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += 32)
 2450   {
 2451     u32 k00 = K00;
 2452     u32 k01 = K01;
 2453     u32 k02 = K02;
 2454     u32 k03 = K03;
 2455     u32 k04 = K04;
 2456     u32 k05 = K05;
 2457     u32 k06 = K06;
 2458     u32 k07 = K07;
 2459     u32 k08 = K08;
 2460     u32 k09 = K09;
 2461     u32 k10 = K10;
 2462     u32 k11 = K11;
 2463     u32 k12 = K12;
 2464     u32 k13 = K13;
 2465     u32 k14 = K14;
 2466     u32 k15 = K15;
 2467     u32 k16 = K16;
 2468     u32 k17 = K17;
 2469     u32 k18 = K18;
 2470     u32 k19 = K19;
 2471     u32 k20 = K20;
 2472     u32 k21 = K21;
 2473     u32 k22 = K22;
 2474     u32 k23 = K23;
 2475     u32 k24 = K24;
 2476     u32 k25 = K25;
 2477     u32 k26 = K26;
 2478     u32 k27 = K27;
 2479 
 2480     const u32 pc_pos = il_pos / 32;
 2481 
 2482     k00 |= words_buf_s[pc_pos].b[ 0];
 2483     k01 |= words_buf_s[pc_pos].b[ 1];
 2484     k02 |= words_buf_s[pc_pos].b[ 2];
 2485     k03 |= words_buf_s[pc_pos].b[ 3];
 2486     k04 |= words_buf_s[pc_pos].b[ 4];
 2487     k05 |= words_buf_s[pc_pos].b[ 5];
 2488     k06 |= words_buf_s[pc_pos].b[ 6];
 2489     k07 |= words_buf_s[pc_pos].b[ 7];
 2490     k08 |= words_buf_s[pc_pos].b[ 8];
 2491     k09 |= words_buf_s[pc_pos].b[ 9];
 2492     k10 |= words_buf_s[pc_pos].b[10];
 2493     k11 |= words_buf_s[pc_pos].b[11];
 2494     k12 |= words_buf_s[pc_pos].b[12];
 2495     k13 |= words_buf_s[pc_pos].b[13];
 2496     k14 |= words_buf_s[pc_pos].b[14];
 2497     k15 |= words_buf_s[pc_pos].b[15];
 2498     k16 |= words_buf_s[pc_pos].b[16];
 2499     k17 |= words_buf_s[pc_pos].b[17];
 2500     k18 |= words_buf_s[pc_pos].b[18];
 2501     k19 |= words_buf_s[pc_pos].b[19];
 2502     k20 |= words_buf_s[pc_pos].b[20];
 2503     k21 |= words_buf_s[pc_pos].b[21];
 2504     k22 |= words_buf_s[pc_pos].b[22];
 2505     k23 |= words_buf_s[pc_pos].b[23];
 2506     k24 |= words_buf_s[pc_pos].b[24];
 2507     k25 |= words_buf_s[pc_pos].b[25];
 2508     k26 |= words_buf_s[pc_pos].b[26];
 2509     k27 |= words_buf_s[pc_pos].b[27];
 2510 
 2511     u32 D00 = 0;
 2512     u32 D01 = 0;
 2513     u32 D02 = 0;
 2514     u32 D03 = 0;
 2515     u32 D04 = 0;
 2516     u32 D05 = 0;
 2517     u32 D06 = 0;
 2518     u32 D07 = 0;
 2519     u32 D08 = 0;
 2520     u32 D09 = 0;
 2521     u32 D10 = 0;
 2522     u32 D11 = 0;
 2523     u32 D12 = 0;
 2524     u32 D13 = 0;
 2525     u32 D14 = 0;
 2526     u32 D15 = 0;
 2527     u32 D16 = 0;
 2528     u32 D17 = 0;
 2529     u32 D18 = 0;
 2530     u32 D19 = 0;
 2531     u32 D20 = 0;
 2532     u32 D21 = 0;
 2533     u32 D22 = 0;
 2534     u32 D23 = 0;
 2535     u32 D24 = 0;
 2536     u32 D25 = 0;
 2537     u32 D26 = 0;
 2538     u32 D27 = 0;
 2539     u32 D28 = 0;
 2540     u32 D29 = 0;
 2541     u32 D30 = 0;
 2542     u32 D31 = 0;
 2543     u32 D32 = 0;
 2544     u32 D33 = 0;
 2545     u32 D34 = 0;
 2546     u32 D35 = 0;
 2547     u32 D36 = 0;
 2548     u32 D37 = 0;
 2549     u32 D38 = 0;
 2550     u32 D39 = 0;
 2551     u32 D40 = 0;
 2552     u32 D41 = 0;
 2553     u32 D42 = 0;
 2554     u32 D43 = 0;
 2555     u32 D44 = 0;
 2556     u32 D45 = 0;
 2557     u32 D46 = 0;
 2558     u32 D47 = 0;
 2559     u32 D48 = 0;
 2560     u32 D49 = 0;
 2561     u32 D50 = 0;
 2562     u32 D51 = 0;
 2563     u32 D52 = 0;
 2564     u32 D53 = 0;
 2565     u32 D54 = 0;
 2566     u32 D55 = 0;
 2567     u32 D56 = 0;
 2568     u32 D57 = 0;
 2569     u32 D58 = 0;
 2570     u32 D59 = 0;
 2571     u32 D60 = 0;
 2572     u32 D61 = 0;
 2573     u32 D62 = 0;
 2574     u32 D63 = 0;
 2575 
 2576     DESCrypt
 2577     (
 2578       salt,
 2579       k00, k01, k02, k03, k04, k05, k06,
 2580       k07, k08, k09, k10, k11, k12, k13,
 2581       k14, k15, k16, k17, k18, k19, k20,
 2582       k21, k22, k23, k24, k25, k26, k27,
 2583       K28, K29, K30, K31, K32, K33, K34,
 2584       K35, K36, K37, K38, K39, K40, K41,
 2585       K42, K43, K44, K45, K46, K47, K48,
 2586       K49, K50, K51, K52, K53, K54, K55,
 2587       &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
 2588       &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
 2589       &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
 2590       &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
 2591       &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
 2592       &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
 2593       &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
 2594       &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
 2595     );
 2596 
 2597     u32 tmpResult = 0;
 2598 
 2599     tmpResult |= D00 ^ S00;
 2600     tmpResult |= D01 ^ S01;
 2601     tmpResult |= D02 ^ S02;
 2602     tmpResult |= D03 ^ S03;
 2603     tmpResult |= D04 ^ S04;
 2604     tmpResult |= D05 ^ S05;
 2605     tmpResult |= D06 ^ S06;
 2606     tmpResult |= D07 ^ S07;
 2607     tmpResult |= D08 ^ S08;
 2608     tmpResult |= D09 ^ S09;
 2609     tmpResult |= D10 ^ S10;
 2610     tmpResult |= D11 ^ S11;
 2611     tmpResult |= D12 ^ S12;
 2612     tmpResult |= D13 ^ S13;
 2613     tmpResult |= D14 ^ S14;
 2614     tmpResult |= D15 ^ S15;
 2615 
 2616     if (tmpResult == 0xffffffff) continue;
 2617 
 2618     tmpResult |= D16 ^ S16;
 2619     tmpResult |= D17 ^ S17;
 2620     tmpResult |= D18 ^ S18;
 2621     tmpResult |= D19 ^ S19;
 2622     tmpResult |= D20 ^ S20;
 2623     tmpResult |= D21 ^ S21;
 2624     tmpResult |= D22 ^ S22;
 2625     tmpResult |= D23 ^ S23;
 2626     tmpResult |= D24 ^ S24;
 2627     tmpResult |= D25 ^ S25;
 2628     tmpResult |= D26 ^ S26;
 2629     tmpResult |= D27 ^ S27;
 2630     tmpResult |= D28 ^ S28;
 2631     tmpResult |= D29 ^ S29;
 2632     tmpResult |= D30 ^ S30;
 2633     tmpResult |= D31 ^ S31;
 2634 
 2635     if (tmpResult == 0xffffffff) continue;
 2636 
 2637     tmpResult |= D32 ^ S32;
 2638     tmpResult |= D33 ^ S33;
 2639     tmpResult |= D34 ^ S34;
 2640     tmpResult |= D35 ^ S35;
 2641     tmpResult |= D36 ^ S36;
 2642     tmpResult |= D37 ^ S37;
 2643     tmpResult |= D38 ^ S38;
 2644     tmpResult |= D39 ^ S39;
 2645     tmpResult |= D40 ^ S40;
 2646     tmpResult |= D41 ^ S41;
 2647     tmpResult |= D42 ^ S42;
 2648     tmpResult |= D43 ^ S43;
 2649     tmpResult |= D44 ^ S44;
 2650     tmpResult |= D45 ^ S45;
 2651     tmpResult |= D46 ^ S46;
 2652     tmpResult |= D47 ^ S47;
 2653 
 2654     if (tmpResult == 0xffffffff) continue;
 2655 
 2656     tmpResult |= D48 ^ S48;
 2657     tmpResult |= D49 ^ S49;
 2658     tmpResult |= D50 ^ S50;
 2659     tmpResult |= D51 ^ S51;
 2660     tmpResult |= D52 ^ S52;
 2661     tmpResult |= D53 ^ S53;
 2662     tmpResult |= D54 ^ S54;
 2663     tmpResult |= D55 ^ S55;
 2664     tmpResult |= D56 ^ S56;
 2665     tmpResult |= D57 ^ S57;
 2666     tmpResult |= D58 ^ S58;
 2667     tmpResult |= D59 ^ S59;
 2668     tmpResult |= D60 ^ S60;
 2669     tmpResult |= D61 ^ S61;
 2670     tmpResult |= D62 ^ S62;
 2671     tmpResult |= D63 ^ S63;
 2672 
 2673     if (tmpResult == 0xffffffff) continue;
 2674 
 2675     const u32 slice = ffz (tmpResult);
 2676 
 2677     #ifdef KERNEL_STATIC
 2678     #include COMPARE_S
 2679     #endif
 2680   }
 2681 }