"Fossies" - the Fresh Open Source Software Archive

Member "john-1.9.0/src/DES_bs_b.c" (10 Apr 2019, 25542 Bytes) of package /linux/privat/john-1.9.0.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "DES_bs_b.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 1.8.0_vs_1.9.0.

    1 /*
    2  * This file is part of John the Ripper password cracker,
    3  * Copyright (c) 1996-2001,2003,2010-2013,2015,2019 by Solar Designer
    4  */
    5 
    6 #include "arch.h"
    7 #include "common.h"
    8 #include "DES_bs.h"
    9 
   10 #if DES_BS_ASM && defined(_OPENMP) && defined(__GNUC__)
   11 #warning Assembly code and OpenMP are both requested - will provide the former, but not the latter (for DES-based hashes).  This may likely be corrected by enabling SIMD intrinsics with the C compiler (try adding -msse2 to OMPFLAGS).
   12 #endif
   13 
   14 #if !DES_BS_ASM
   15 
   16 #define vzero (*(vtype *)&DES_bs_all.zero)
   17 #if DES_bs_mt
   18 #define vones (*(vtype *)&DES_bs_all_by_tnum(-1).ones)
   19 #else
   20 #define vones (*(vtype *)&DES_bs_all.ones)
   21 #endif
   22 
   23 #define DES_BS_VECTOR_LOOPS 0
   24 
   25 #if (defined(__ARM_NEON) || defined(__aarch64__)) && DES_BS_DEPTH == 128
   26 #include <arm_neon.h>
   27 
   28 typedef uint32x4_t vtype;
   29 
   30 #define vst(dst, ofs, src) \
   31     vst1q_u32((uint32_t *)((DES_bs_vector *)&(dst) + (ofs)), (src))
   32 
   33 #define vxorf(a, b) \
   34     veorq_u32((a), (b))
   35 
   36 #define vnot(dst, a) \
   37     (dst) = vmvnq_u32((a))
   38 #define vand(dst, a, b) \
   39     (dst) = vandq_u32((a), (b))
   40 #define vor(dst, a, b) \
   41     (dst) = vorrq_u32((a), (b))
   42 #define vandn(dst, a, b) \
   43     (dst) = vbicq_u32((a), (b))
   44 #define vsel(dst, a, b, c) \
   45     (dst) = vbslq_u32((c), (b), (a))
   46 
   47 #if 0
   48 #define vshl1(dst, src) \
   49     (dst) = vaddq_u32((src), (src))
   50 #endif
   51 #define vshl(dst, src, shift) \
   52     (dst) = vshlq_n_u32((src), (shift))
   53 #define vshr(dst, src, shift) \
   54     (dst) = vshrq_n_u32((src), (shift))
   55 
   56 #elif (defined(__ARM_NEON) || defined(__aarch64__)) && DES_BS_DEPTH == 64 && DES_BS_VECTOR > 0
   57 #include <arm_neon.h>
   58 
   59 typedef uint32x2_t vtype;
   60 
   61 #define vst(dst, ofs, src) \
   62     vst1_u32((uint32_t *)((DES_bs_vector *)&(dst) + (ofs)), (src))
   63 
   64 #define vxorf(a, b) \
   65     veor_u32((a), (b))
   66 
   67 #define vnot(dst, a) \
   68     (dst) = vmvn_u32((a))
   69 #define vand(dst, a, b) \
   70     (dst) = vand_u32((a), (b))
   71 #define vor(dst, a, b) \
   72     (dst) = vorr_u32((a), (b))
   73 #define vandn(dst, a, b) \
   74     (dst) = vbic_u32((a), (b))
   75 #define vsel(dst, a, b, c) \
   76     (dst) = vbsl_u32((c), (b), (a))
   77 
   78 #if 0
   79 #define vshl1(dst, src) \
   80     (dst) = vadd_u32((src), (src))
   81 #endif
   82 #define vshl(dst, src, shift) \
   83     (dst) = vshl_n_u32((src), (shift))
   84 #define vshr(dst, src, shift) \
   85     (dst) = vshr_n_u32((src), (shift))
   86 
   87 #elif defined(__ALTIVEC__) && DES_BS_DEPTH == 128
   88 #ifdef __linux__
   89 #include <altivec.h>
   90 #endif
   91 
   92 typedef vector signed int vtype;
   93 
   94 #define vst(dst, ofs, src) \
   95     vec_st((src), (ofs) * sizeof(DES_bs_vector), (vtype *)(dst))
   96 
   97 #define vxorf(a, b) \
   98     vec_xor((a), (b))
   99 
  100 #define vnot(dst, a) \
  101     (dst) = vec_nor((a), (a))
  102 #define vand(dst, a, b) \
  103     (dst) = vec_and((a), (b))
  104 #define vor(dst, a, b) \
  105     (dst) = vec_or((a), (b))
  106 #define vandn(dst, a, b) \
  107     (dst) = vec_andc((a), (b))
  108 #define vsel(dst, a, b, c) \
  109     (dst) = vec_sel((a), (b), (vector bool int)(c))
  110 
  111 #elif (defined(__MIC__) || defined(__AVX512F__)) && DES_BS_DEPTH == 512
  112 #include <immintrin.h>
  113 
  114 typedef __m512i vtype;
  115 
  116 #define vst(dst, ofs, src) \
  117     _mm512_store_epi32((vtype *)((DES_bs_vector *)&(dst) + (ofs)), (src))
  118 
  119 #define vxorf(a, b) \
  120     _mm512_xor_epi32((a), (b))
  121 
  122 #define vand(dst, a, b) \
  123     (dst) = _mm512_and_epi32((a), (b))
  124 #define vor(dst, a, b) \
  125     (dst) = _mm512_or_epi32((a), (b))
  126 #define vandn(dst, a, b) \
  127     (dst) = _mm512_andnot_epi32((b), (a))
  128 
  129 #define vshl1(dst, src) \
  130     (dst) = _mm512_add_epi32((src), (src))
  131 #define vshl(dst, src, shift) \
  132     (dst) = _mm512_slli_epi32((src), (shift))
  133 #define vshr(dst, src, shift) \
  134     (dst) = _mm512_srli_epi32((src), (shift))
  135 
  136 #ifdef __AVX512F__
  137 #define vsel(dst, a, b, c) \
  138     (dst) = _mm512_ternarylogic_epi32((b), (a), (c), 0xE4)
  139 #define vlut3(a, b, c, d) \
  140     _mm512_ternarylogic_epi32((a), (b), (c), (d))
  141 #endif
  142 
  143 #elif defined(__AVX2__) && DES_BS_DEPTH == 256
  144 #include <immintrin.h>
  145 
  146 typedef __m256i vtype;
  147 
  148 #define vst(dst, ofs, src) \
  149     _mm256_store_si256((vtype *)((DES_bs_vector *)&(dst) + (ofs)), (src))
  150 
  151 #define vxorf(a, b) \
  152     _mm256_xor_si256((a), (b))
  153 
  154 #define vand(dst, a, b) \
  155     (dst) = _mm256_and_si256((a), (b))
  156 #define vor(dst, a, b) \
  157     (dst) = _mm256_or_si256((a), (b))
  158 #define vandn(dst, a, b) \
  159     (dst) = _mm256_andnot_si256((b), (a))
  160 
  161 #define vshl1(dst, src) \
  162     (dst) = _mm256_add_epi8((src), (src))
  163 #define vshl(dst, src, shift) \
  164     (dst) = _mm256_slli_epi64((src), (shift))
  165 #define vshr(dst, src, shift) \
  166     (dst) = _mm256_srli_epi64((src), (shift))
  167 
  168 #elif defined(__SSE2__) && DES_BS_DEPTH == 128
  169 #ifdef __AVX__
  170 #include <immintrin.h>
  171 #ifdef __XOP__
  172 #include <x86intrin.h>
  173 #endif
  174 #else
  175 #include <emmintrin.h>
  176 #endif
  177 
  178 typedef __m128i vtype;
  179 
  180 #define vst(dst, ofs, src) \
  181     _mm_store_si128((vtype *)((DES_bs_vector *)&(dst) + (ofs)), (src))
  182 
  183 #define vxorf(a, b) \
  184     _mm_xor_si128((a), (b))
  185 
  186 #define vand(dst, a, b) \
  187     (dst) = _mm_and_si128((a), (b))
  188 #define vor(dst, a, b) \
  189     (dst) = _mm_or_si128((a), (b))
  190 #define vandn(dst, a, b) \
  191     (dst) = _mm_andnot_si128((b), (a))
  192 
  193 #ifdef __XOP__
  194 #define vsel(dst, a, b, c) \
  195     (dst) = _mm_cmov_si128((b), (a), (c))
  196 #else
  197 #define vsel(dst, a, b, c) \
  198     (dst) = _mm_xor_si128(_mm_andnot_si128((c), (a)), \
  199         _mm_and_si128((c), (b)))
  200 #endif
  201 
  202 #define vshl1(dst, src) \
  203     (dst) = _mm_add_epi8((src), (src))
  204 #define vshl(dst, src, shift) \
  205     (dst) = _mm_slli_epi64((src), (shift))
  206 #define vshr(dst, src, shift) \
  207     (dst) = _mm_srli_epi64((src), (shift))
  208 
  209 #elif defined(__MMX__) && ARCH_BITS != 64 && DES_BS_DEPTH == 64
  210 #include <mmintrin.h>
  211 
  212 typedef __m64 vtype;
  213 
  214 #define vxorf(a, b) \
  215     _mm_xor_si64((a), (b))
  216 
  217 #define vand(dst, a, b) \
  218     (dst) = _mm_and_si64((a), (b))
  219 #define vor(dst, a, b) \
  220     (dst) = _mm_or_si64((a), (b))
  221 #define vandn(dst, a, b) \
  222     (dst) = _mm_andnot_si64((b), (a))
  223 
  224 #define vshl1(dst, src) \
  225     (dst) = _mm_add_pi8((src), (src))
  226 #define vshl(dst, src, shift) \
  227     (dst) = _mm_slli_si64((src), (shift))
  228 #define vshr(dst, src, shift) \
  229     (dst) = _mm_srli_si64((src), (shift))
  230 
  231 #else
  232 
  233 #if DES_BS_VECTOR
  234 #undef DES_BS_VECTOR_LOOPS
  235 #define DES_BS_VECTOR_LOOPS 1
  236 #endif
  237 
  238 typedef unsigned ARCH_WORD vtype;
  239 
  240 #define vxorf(a, b) \
  241     ((a) ^ (b))
  242 
  243 #define vnot(dst, a) \
  244     (dst) = ~(a)
  245 #define vand(dst, a, b) \
  246     (dst) = (a) & (b)
  247 #define vor(dst, a, b) \
  248     (dst) = (a) | (b)
  249 #define vandn(dst, a, b) \
  250     (dst) = (a) & ~(b)
  251 #define vsel(dst, a, b, c) \
  252     (dst) = (((a) & ~(c)) ^ ((b) & (c)))
  253 
  254 #define vshl(dst, src, shift) \
  255     (dst) = (src) << (shift)
  256 #define vshr(dst, src, shift) \
  257     (dst) = (src) >> (shift)
  258 
  259 /* Assume that 0 always fits in one load immediate instruction */
  260 #undef vzero
  261 #define vzero 0
  262 
  263 /* Archs friendly to use of immediate values */
  264 #if defined(__x86_64__) || defined(__i386__)
  265 #undef vones
  266 #define vones (~(vtype)0)
  267 #endif
  268 
  269 #endif
  270 
  271 #ifndef vst
  272 #define vst(dst, ofs, src) \
  273     *((vtype *)((DES_bs_vector *)&(dst) + (ofs))) = (src)
  274 #endif
  275 
  276 #if !defined(vxor) && defined(vxorf)
  277 #define vxor(dst, a, b) \
  278     (dst) = vxorf((a), (b))
  279 #endif
  280 #if !defined(vxorf) && defined(vxor)
  281 /*
  282  * This requires gcc's "Statement Exprs" extension (also supported by a number
  283  * of other C compilers).
  284  */
  285 #define vxorf(a, b) \
  286     ({ vtype tmp; vxor(tmp, (a), (b)); tmp; })
  287 #endif
  288 
  289 #ifndef vnot
  290 #define vnot(dst, a) \
  291     vxor((dst), (a), vones)
  292 #endif
  293 
  294 #ifndef vshl1
  295 #define vshl1(dst, src) \
  296     vshl((dst), (src), 1)
  297 #endif
  298 
  299 #if !DES_BS_VECTOR_LOOPS && defined(vshl) && defined(vshr)
  300 #define DES_BS_VECTOR_LOOPS_K 0
  301 #define DEPTH_K
  302 #define for_each_depth_k()
  303 
  304 #define kvtype vtype
  305 #define kvand vand
  306 #define kvor vor
  307 #define kvshl1 vshl1
  308 #define kvshl vshl
  309 #define kvshr vshr
  310 #else
  311 #if DES_BS_VECTOR
  312 #define DES_BS_VECTOR_LOOPS_K 1
  313 #define DEPTH_K             [depth]
  314 #define for_each_depth_k() \
  315     for (depth = 0; depth < DES_BS_VECTOR; depth++)
  316 #else
  317 #define DES_BS_VECTOR_LOOPS_K 0
  318 #endif
  319 
  320 typedef unsigned ARCH_WORD kvtype;
  321 #define kvand(dst, a, b) \
  322     (dst) = (a) & (b)
  323 #define kvor(dst, a, b) \
  324     (dst) = (a) | (b)
  325 #define kvshl1(dst, src) \
  326     (dst) = (src) << 1
  327 #define kvshl(dst, src, shift) \
  328     (dst) = (src) << (shift)
  329 #define kvshr(dst, src, shift) \
  330     (dst) = (src) >> (shift)
  331 #endif
  332 
  333 #if !DES_BS_VECTOR || DES_BS_VECTOR_LOOPS_K
  334 #ifdef __x86_64__
  335 #define mask01 0x0101010101010101UL
  336 #elif __i386__
  337 #define mask01 0x01010101UL
  338 #else
  339 #undef mask01
  340 #endif
  341 #ifdef mask01
  342 #define mask02 (mask01 << 1)
  343 #define mask04 (mask01 << 2)
  344 #define mask08 (mask01 << 3)
  345 #define mask10 (mask01 << 4)
  346 #define mask20 (mask01 << 5)
  347 #define mask40 (mask01 << 6)
  348 #define mask80 (mask01 << 7)
  349 #endif
  350 #endif
  351 
  352 #ifndef mask01
  353 #define mask01 (*(kvtype *)&DES_bs_all.masks[0])
  354 #define mask02 (*(kvtype *)&DES_bs_all.masks[1])
  355 #define mask04 (*(kvtype *)&DES_bs_all.masks[2])
  356 #define mask08 (*(kvtype *)&DES_bs_all.masks[3])
  357 #define mask10 (*(kvtype *)&DES_bs_all.masks[4])
  358 #define mask20 (*(kvtype *)&DES_bs_all.masks[5])
  359 #define mask40 (*(kvtype *)&DES_bs_all.masks[6])
  360 #define mask80 (*(kvtype *)&DES_bs_all.masks[7])
  361 #endif
  362 
  363 #ifdef __i386__
  364 /* register-starved */
  365 #define LOAD_V \
  366     kvtype v0 = *(kvtype *)&vp[0]; \
  367     kvtype v4 = *(kvtype *)&vp[4];
  368 #define v1 *(kvtype *)&vp[1]
  369 #define v2 *(kvtype *)&vp[2]
  370 #define v3 *(kvtype *)&vp[3]
  371 #define v5 *(kvtype *)&vp[5]
  372 #define v6 *(kvtype *)&vp[6]
  373 #define v7 *(kvtype *)&vp[7]
  374 #else
  375 #define LOAD_V \
  376     kvtype v0 = *(kvtype *)&vp[0]; \
  377     kvtype v1 = *(kvtype *)&vp[1]; \
  378     kvtype v2 = *(kvtype *)&vp[2]; \
  379     kvtype v3 = *(kvtype *)&vp[3]; \
  380     kvtype v4 = *(kvtype *)&vp[4]; \
  381     kvtype v5 = *(kvtype *)&vp[5]; \
  382     kvtype v6 = *(kvtype *)&vp[6]; \
  383     kvtype v7 = *(kvtype *)&vp[7];
  384 #endif
  385 
  386 #define kvand_shl1_or(dst, src, mask) \
  387     kvand(tmp, src, mask); \
  388     kvshl1(tmp, tmp); \
  389     kvor(dst, dst, tmp)
  390 
  391 #define kvand_shl_or(dst, src, mask, shift) \
  392     kvand(tmp, src, mask); \
  393     kvshl(tmp, tmp, shift); \
  394     kvor(dst, dst, tmp)
  395 
  396 #define kvand_shl1(dst, src, mask) \
  397     kvand(tmp, src, mask); \
  398     kvshl1(dst, tmp)
  399 
  400 #define kvand_or(dst, src, mask) \
  401     kvand(tmp, src, mask); \
  402     kvor(dst, dst, tmp)
  403 
  404 #define kvand_shr_or(dst, src, mask, shift) \
  405     kvand(tmp, src, mask); \
  406     kvshr(tmp, tmp, shift); \
  407     kvor(dst, dst, tmp)
  408 
  409 #define kvand_shr(dst, src, mask, shift) \
  410     kvand(tmp, src, mask); \
  411     kvshr(dst, tmp, shift)
  412 
  413 #define FINALIZE_NEXT_KEY_BIT_0 { \
  414     kvtype m = mask01, va, vb, tmp; \
  415     kvand(va, v0, m); \
  416     kvand_shl1(vb, v1, m); \
  417     kvand_shl_or(va, v2, m, 2); \
  418     kvand_shl_or(vb, v3, m, 3); \
  419     kvand_shl_or(va, v4, m, 4); \
  420     kvand_shl_or(vb, v5, m, 5); \
  421     kvand_shl_or(va, v6, m, 6); \
  422     kvand_shl_or(vb, v7, m, 7); \
  423     kvor(*(kvtype *)kp, va, vb); \
  424     kp++; \
  425 }
  426 
  427 #define FINALIZE_NEXT_KEY_BIT_1 { \
  428     kvtype m = mask02, va, vb, tmp; \
  429     kvand_shr(va, v0, m, 1); \
  430     kvand(vb, v1, m); \
  431     kvand_shl1_or(va, v2, m); \
  432     kvand_shl_or(vb, v3, m, 2); \
  433     kvand_shl_or(va, v4, m, 3); \
  434     kvand_shl_or(vb, v5, m, 4); \
  435     kvand_shl_or(va, v6, m, 5); \
  436     kvand_shl_or(vb, v7, m, 6); \
  437     kvor(*(kvtype *)kp, va, vb); \
  438     kp++; \
  439 }
  440 
  441 #define FINALIZE_NEXT_KEY_BIT_2 { \
  442     kvtype m = mask04, va, vb, tmp; \
  443     kvand_shr(va, v0, m, 2); \
  444     kvand_shr(vb, v1, m, 1); \
  445     kvand_or(va, v2, m); \
  446     kvand_shl1_or(vb, v3, m); \
  447     kvand_shl_or(va, v4, m, 2); \
  448     kvand_shl_or(vb, v5, m, 3); \
  449     kvand_shl_or(va, v6, m, 4); \
  450     kvand_shl_or(vb, v7, m, 5); \
  451     kvor(*(kvtype *)kp, va, vb); \
  452     kp++; \
  453 }
  454 
  455 #define FINALIZE_NEXT_KEY_BIT_3 { \
  456     kvtype m = mask08, va, vb, tmp; \
  457     kvand_shr(va, v0, m, 3); \
  458     kvand_shr(vb, v1, m, 2); \
  459     kvand_shr_or(va, v2, m, 1); \
  460     kvand_or(vb, v3, m); \
  461     kvand_shl1_or(va, v4, m); \
  462     kvand_shl_or(vb, v5, m, 2); \
  463     kvand_shl_or(va, v6, m, 3); \
  464     kvand_shl_or(vb, v7, m, 4); \
  465     kvor(*(kvtype *)kp, va, vb); \
  466     kp++; \
  467 }
  468 
  469 #define FINALIZE_NEXT_KEY_BIT_4 { \
  470     kvtype m = mask10, va, vb, tmp; \
  471     kvand_shr(va, v0, m, 4); \
  472     kvand_shr(vb, v1, m, 3); \
  473     kvand_shr_or(va, v2, m, 2); \
  474     kvand_shr_or(vb, v3, m, 1); \
  475     kvand_or(va, v4, m); \
  476     kvand_shl1_or(vb, v5, m); \
  477     kvand_shl_or(va, v6, m, 2); \
  478     kvand_shl_or(vb, v7, m, 3); \
  479     kvor(*(kvtype *)kp, va, vb); \
  480     kp++; \
  481 }
  482 
  483 #define FINALIZE_NEXT_KEY_BIT_5 { \
  484     kvtype m = mask20, va, vb, tmp; \
  485     kvand_shr(va, v0, m, 5); \
  486     kvand_shr(vb, v1, m, 4); \
  487     kvand_shr_or(va, v2, m, 3); \
  488     kvand_shr_or(vb, v3, m, 2); \
  489     kvand_shr_or(va, v4, m, 1); \
  490     kvand_or(vb, v5, m); \
  491     kvand_shl1_or(va, v6, m); \
  492     kvand_shl_or(vb, v7, m, 2); \
  493     kvor(*(kvtype *)kp, va, vb); \
  494     kp++; \
  495 }
  496 
  497 #define FINALIZE_NEXT_KEY_BIT_6 { \
  498     kvtype m = mask40, va, vb, tmp; \
  499     kvand_shr(va, v0, m, 6); \
  500     kvand_shr(vb, v1, m, 5); \
  501     kvand_shr_or(va, v2, m, 4); \
  502     kvand_shr_or(vb, v3, m, 3); \
  503     kvand_shr_or(va, v4, m, 2); \
  504     kvand_shr_or(vb, v5, m, 1); \
  505     kvand_or(va, v6, m); \
  506     kvand_shl1_or(vb, v7, m); \
  507     kvor(*(kvtype *)kp, va, vb); \
  508     kp++; \
  509 }
  510 
  511 #define FINALIZE_NEXT_KEY_BIT_7 { \
  512     kvtype m = mask80, va, vb, tmp; \
  513     kvand_shr(va, v0, m, 7); \
  514     kvand_shr(vb, v1, m, 6); \
  515     kvand_shr_or(va, v2, m, 5); \
  516     kvand_shr_or(vb, v3, m, 4); \
  517     kvand_shr_or(va, v4, m, 3); \
  518     kvand_shr_or(vb, v5, m, 2); \
  519     kvand_shr_or(va, v6, m, 1); \
  520     kvand_or(vb, v7, m); \
  521     kvor(*(kvtype *)kp, va, vb); \
  522     kp++; \
  523 }
  524 
  525 #if DES_bs_mt
  526 static MAYBE_INLINE void DES_bs_finalize_keys(int t)
  527 #else
  528 static MAYBE_INLINE void DES_bs_finalize_keys(void)
  529 #endif
  530 {
  531 #if DES_BS_VECTOR_LOOPS_K
  532     int depth;
  533 #endif
  534 
  535     for_each_depth_k() {
  536         DES_bs_vector *kp = (DES_bs_vector *)&DES_bs_all.K[0] DEPTH_K;
  537         int ic;
  538         for (ic = 0; ic < 8; ic++) {
  539             DES_bs_vector *vp =
  540                 (DES_bs_vector *)&DES_bs_all.xkeys.v[ic][0] DEPTH_K;
  541             LOAD_V
  542             FINALIZE_NEXT_KEY_BIT_0
  543             FINALIZE_NEXT_KEY_BIT_1
  544             FINALIZE_NEXT_KEY_BIT_2
  545             FINALIZE_NEXT_KEY_BIT_3
  546             FINALIZE_NEXT_KEY_BIT_4
  547             FINALIZE_NEXT_KEY_BIT_5
  548             FINALIZE_NEXT_KEY_BIT_6
  549         }
  550     }
  551 
  552 #if DES_BS_EXPAND
  553     {
  554         int index;
  555         for (index = 0; index < 0x300; index++)
  556         for_each_depth_k() {
  557 #if DES_BS_VECTOR_LOOPS_K
  558             DES_bs_all.KS.v[index] DEPTH_K =
  559                 DES_bs_all.KSp[index] DEPTH_K;
  560 #else
  561             vst(*(kvtype *)&DES_bs_all.KS.v[index], 0,
  562                 *(kvtype *)DES_bs_all.KSp[index]);
  563 #endif
  564         }
  565     }
  566 #endif
  567 }
  568 
  569 #endif
  570 
  571 #if DES_bs_mt
  572 MAYBE_INLINE void DES_bs_set_salt_for_thread(int t, unsigned int salt)
  573 #else
  574 void DES_bs_set_salt(ARCH_WORD salt)
  575 #endif
  576 {
  577     unsigned int new = salt;
  578     unsigned int old = DES_bs_all.salt;
  579     int dst;
  580 
  581     DES_bs_all.salt = new;
  582 
  583     for (dst = 0; dst < 24; dst++) {
  584         if ((new ^ old) & 1) {
  585             DES_bs_vector *sp1, *sp2;
  586             int src1 = dst;
  587             int src2 = dst + 24;
  588             if (new & 1) {
  589                 src1 = src2;
  590                 src2 = dst;
  591             }
  592             sp1 = DES_bs_all.Ens[src1];
  593             sp2 = DES_bs_all.Ens[src2];
  594             DES_bs_all.E.E[dst] = (ARCH_WORD *)sp1;
  595             DES_bs_all.E.E[dst + 24] = (ARCH_WORD *)sp2;
  596             DES_bs_all.E.E[dst + 48] = (ARCH_WORD *)(sp1 + 32);
  597             DES_bs_all.E.E[dst + 72] = (ARCH_WORD *)(sp2 + 32);
  598         }
  599         new >>= 1;
  600         old >>= 1;
  601         if (new == old)
  602             break;
  603     }
  604 }
  605 
  606 #if !DES_BS_ASM
  607 
  608 /* Include the S-boxes here so that the compiler can inline them */
  609 #if DES_BS == 4
  610 #include "sboxes-t.c"
  611 #elif DES_BS == 3
  612 #include "sboxes-s.c"
  613 #elif DES_BS == 2
  614 #include "sboxes.c"
  615 #else
  616 #undef andn
  617 #include "nonstd.c"
  618 #endif
  619 
  620 #define b               DES_bs_all.B
  621 #define e               DES_bs_all.E.E
  622 
  623 #if DES_BS_VECTOR_LOOPS
  624 #define kd              [depth]
  625 #define bd              [depth]
  626 #define ed              [depth]
  627 #define DEPTH               [depth]
  628 #define for_each_depth() \
  629     for (depth = 0; depth < DES_BS_VECTOR; depth++)
  630 #else
  631 #if DES_BS_EXPAND
  632 #define kd
  633 #else
  634 #define kd              [0]
  635 #endif
  636 #define bd
  637 #define ed              [0]
  638 #define DEPTH
  639 #define for_each_depth()
  640 #endif
  641 
  642 #define DES_bs_clear_block_8(i) \
  643     for_each_depth() { \
  644         vst(b[i] bd, 0, zero); \
  645         vst(b[i] bd, 1, zero); \
  646         vst(b[i] bd, 2, zero); \
  647         vst(b[i] bd, 3, zero); \
  648         vst(b[i] bd, 4, zero); \
  649         vst(b[i] bd, 5, zero); \
  650         vst(b[i] bd, 6, zero); \
  651         vst(b[i] bd, 7, zero); \
  652     }
  653 
  654 #define DES_bs_clear_block \
  655     DES_bs_clear_block_8(0); \
  656     DES_bs_clear_block_8(8); \
  657     DES_bs_clear_block_8(16); \
  658     DES_bs_clear_block_8(24); \
  659     DES_bs_clear_block_8(32); \
  660     DES_bs_clear_block_8(40); \
  661     DES_bs_clear_block_8(48); \
  662     DES_bs_clear_block_8(56);
  663 
  664 #define DES_bs_set_block_8(i, v0, v1, v2, v3, v4, v5, v6, v7) \
  665     for_each_depth() { \
  666         vst(b[i] bd, 0, v0); \
  667         vst(b[i] bd, 1, v1); \
  668         vst(b[i] bd, 2, v2); \
  669         vst(b[i] bd, 3, v3); \
  670         vst(b[i] bd, 4, v4); \
  671         vst(b[i] bd, 5, v5); \
  672         vst(b[i] bd, 6, v6); \
  673         vst(b[i] bd, 7, v7); \
  674     }
  675 
  676 #define x(p) vxorf(*(vtype *)&e[p] ed, *(vtype *)&k[p] kd)
  677 #define y(p, q) vxorf(*(vtype *)&b[p] bd, *(vtype *)&k[q] kd)
  678 #define z(r) ((vtype *)&b[r] bd)
  679 
  680 void DES_bs_crypt_25(int keys_count)
  681 {
  682 #if DES_bs_mt
  683     int t, n = (keys_count + (DES_BS_DEPTH - 1)) / DES_BS_DEPTH;
  684 #endif
  685 
  686 #ifdef _OPENMP
  687 #pragma omp parallel for default(none) private(t) shared(n, DES_bs_all_p, keys_count)
  688 #endif
  689     for_each_t(n) {
  690 #if DES_BS_EXPAND
  691         DES_bs_vector *k;
  692 #else
  693         ARCH_WORD **k;
  694 #endif
  695         int iterations, rounds_and_swapped;
  696 #if DES_BS_VECTOR_LOOPS
  697         int depth;
  698 #endif
  699 
  700         if (DES_bs_all.keys_changed)
  701             goto finalize_keys;
  702 
  703 body:
  704 #if DES_bs_mt
  705         DES_bs_set_salt_for_thread(t, DES_bs_all_by_tnum(-1).salt);
  706 #endif
  707 
  708         {
  709             vtype zero = vzero;
  710             DES_bs_clear_block
  711         }
  712 
  713 #if DES_BS_EXPAND
  714         k = DES_bs_all.KS.v;
  715 #else
  716         k = DES_bs_all.KS.p;
  717 #endif
  718         rounds_and_swapped = 8;
  719         iterations = 25;
  720 
  721 start:
  722         for_each_depth()
  723         s1(x(0), x(1), x(2), x(3), x(4), x(5),
  724             z(40), z(48), z(54), z(62));
  725         for_each_depth()
  726         s2(x(6), x(7), x(8), x(9), x(10), x(11),
  727             z(44), z(59), z(33), z(49));
  728         for_each_depth()
  729         s3(y(7, 12), y(8, 13), y(9, 14),
  730             y(10, 15), y(11, 16), y(12, 17),
  731             z(55), z(47), z(61), z(37));
  732         for_each_depth()
  733         s4(y(11, 18), y(12, 19), y(13, 20),
  734             y(14, 21), y(15, 22), y(16, 23),
  735             z(57), z(51), z(41), z(32));
  736         for_each_depth()
  737         s5(x(24), x(25), x(26), x(27), x(28), x(29),
  738             z(39), z(45), z(56), z(34));
  739         for_each_depth()
  740         s6(x(30), x(31), x(32), x(33), x(34), x(35),
  741             z(35), z(60), z(42), z(50));
  742         for_each_depth()
  743         s7(y(23, 36), y(24, 37), y(25, 38),
  744             y(26, 39), y(27, 40), y(28, 41),
  745             z(63), z(43), z(53), z(38));
  746         for_each_depth()
  747         s8(y(27, 42), y(28, 43), y(29, 44),
  748             y(30, 45), y(31, 46), y(0, 47),
  749             z(36), z(58), z(46), z(52));
  750 
  751         if (rounds_and_swapped == 0x100) goto next;
  752 
  753 swap:
  754         for_each_depth()
  755         s1(x(48), x(49), x(50), x(51), x(52), x(53),
  756             z(8), z(16), z(22), z(30));
  757         for_each_depth()
  758         s2(x(54), x(55), x(56), x(57), x(58), x(59),
  759             z(12), z(27), z(1), z(17));
  760         for_each_depth()
  761         s3(y(39, 60), y(40, 61), y(41, 62),
  762             y(42, 63), y(43, 64), y(44, 65),
  763             z(23), z(15), z(29), z(5));
  764         for_each_depth()
  765         s4(y(43, 66), y(44, 67), y(45, 68),
  766             y(46, 69), y(47, 70), y(48, 71),
  767             z(25), z(19), z(9), z(0));
  768         for_each_depth()
  769         s5(x(72), x(73), x(74), x(75), x(76), x(77),
  770             z(7), z(13), z(24), z(2));
  771         for_each_depth()
  772         s6(x(78), x(79), x(80), x(81), x(82), x(83),
  773             z(3), z(28), z(10), z(18));
  774         for_each_depth()
  775         s7(y(55, 84), y(56, 85), y(57, 86),
  776             y(58, 87), y(59, 88), y(60, 89),
  777             z(31), z(11), z(21), z(6));
  778         for_each_depth()
  779         s8(y(59, 90), y(60, 91), y(61, 92),
  780             y(62, 93), y(63, 94), y(32, 95),
  781             z(4), z(26), z(14), z(20));
  782 
  783         k += 96;
  784 
  785         if (--rounds_and_swapped) goto start;
  786         k -= (0x300 + 48);
  787         rounds_and_swapped = 0x108;
  788         if (--iterations) goto swap;
  789 #if DES_bs_mt
  790         continue;
  791 #else
  792         return;
  793 #endif
  794 
  795 next:
  796         k -= (0x300 - 48);
  797         rounds_and_swapped = 8;
  798         iterations--;
  799         goto start;
  800 
  801 finalize_keys:
  802         DES_bs_all.keys_changed = 0;
  803 #if DES_bs_mt
  804         DES_bs_finalize_keys(t);
  805 #else
  806         DES_bs_finalize_keys();
  807 #endif
  808         goto body;
  809     }
  810 }
  811 
  812 void DES_bs_crypt(int count, int keys_count)
  813 {
  814 #if DES_bs_mt
  815     int t, n = (keys_count + (DES_BS_DEPTH - 1)) / DES_BS_DEPTH;
  816 #endif
  817 
  818 #ifdef _OPENMP
  819 #pragma omp parallel for default(none) private(t) shared(n, DES_bs_all_p, count, keys_count)
  820 #endif
  821     for_each_t(n) {
  822 #if DES_BS_EXPAND
  823         DES_bs_vector *k;
  824 #else
  825         ARCH_WORD **k;
  826 #endif
  827         int iterations, rounds_and_swapped;
  828 #if DES_BS_VECTOR_LOOPS
  829         int depth;
  830 #endif
  831 
  832         if (DES_bs_all.keys_changed)
  833             goto finalize_keys;
  834 
  835 body:
  836 #if DES_bs_mt
  837         DES_bs_set_salt_for_thread(t, DES_bs_all_by_tnum(-1).salt);
  838 #endif
  839 
  840         {
  841             vtype zero = vzero;
  842             DES_bs_clear_block
  843         }
  844 
  845 #if DES_BS_EXPAND
  846         k = DES_bs_all.KS.v;
  847 #else
  848         k = DES_bs_all.KS.p;
  849 #endif
  850         rounds_and_swapped = 8;
  851         iterations = count;
  852 
  853 start:
  854         for_each_depth()
  855         s1(x(0), x(1), x(2), x(3), x(4), x(5),
  856             z(40), z(48), z(54), z(62));
  857         for_each_depth()
  858         s2(x(6), x(7), x(8), x(9), x(10), x(11),
  859             z(44), z(59), z(33), z(49));
  860         for_each_depth()
  861         s3(x(12), x(13), x(14), x(15), x(16), x(17),
  862             z(55), z(47), z(61), z(37));
  863         for_each_depth()
  864         s4(x(18), x(19), x(20), x(21), x(22), x(23),
  865             z(57), z(51), z(41), z(32));
  866         for_each_depth()
  867         s5(x(24), x(25), x(26), x(27), x(28), x(29),
  868             z(39), z(45), z(56), z(34));
  869         for_each_depth()
  870         s6(x(30), x(31), x(32), x(33), x(34), x(35),
  871             z(35), z(60), z(42), z(50));
  872         for_each_depth()
  873         s7(x(36), x(37), x(38), x(39), x(40), x(41),
  874             z(63), z(43), z(53), z(38));
  875         for_each_depth()
  876         s8(x(42), x(43), x(44), x(45), x(46), x(47),
  877             z(36), z(58), z(46), z(52));
  878 
  879         if (rounds_and_swapped == 0x100) goto next;
  880 
  881 swap:
  882         for_each_depth()
  883         s1(x(48), x(49), x(50), x(51), x(52), x(53),
  884             z(8), z(16), z(22), z(30));
  885         for_each_depth()
  886         s2(x(54), x(55), x(56), x(57), x(58), x(59),
  887             z(12), z(27), z(1), z(17));
  888         for_each_depth()
  889         s3(x(60), x(61), x(62), x(63), x(64), x(65),
  890             z(23), z(15), z(29), z(5));
  891         for_each_depth()
  892         s4(x(66), x(67), x(68), x(69), x(70), x(71),
  893             z(25), z(19), z(9), z(0));
  894         for_each_depth()
  895         s5(x(72), x(73), x(74), x(75), x(76), x(77),
  896             z(7), z(13), z(24), z(2));
  897         for_each_depth()
  898         s6(x(78), x(79), x(80), x(81), x(82), x(83),
  899             z(3), z(28), z(10), z(18));
  900         for_each_depth()
  901         s7(x(84), x(85), x(86), x(87), x(88), x(89),
  902             z(31), z(11), z(21), z(6));
  903         for_each_depth()
  904         s8(x(90), x(91), x(92), x(93), x(94), x(95),
  905             z(4), z(26), z(14), z(20));
  906 
  907         k += 96;
  908 
  909         if (--rounds_and_swapped) goto start;
  910         k -= (0x300 + 48);
  911         rounds_and_swapped = 0x108;
  912         if (--iterations) goto swap;
  913 #if DES_bs_mt
  914         continue;
  915 #else
  916         return;
  917 #endif
  918 
  919 next:
  920         k -= (0x300 - 48);
  921         rounds_and_swapped = 8;
  922         if (--iterations) goto start;
  923 #if DES_bs_mt
  924         continue;
  925 #else
  926         return;
  927 #endif
  928 
  929 finalize_keys:
  930         DES_bs_all.keys_changed = 0;
  931 #if DES_bs_mt
  932         DES_bs_finalize_keys(t);
  933 #else
  934         DES_bs_finalize_keys();
  935 #endif
  936         goto body;
  937     }
  938 }
  939 
  940 #undef x
  941 
  942 #if DES_bs_mt
  943 static MAYBE_INLINE void DES_bs_finalize_keys_LM(int t)
  944 #else
  945 static MAYBE_INLINE void DES_bs_finalize_keys_LM(void)
  946 #endif
  947 {
  948 #if DES_BS_VECTOR_LOOPS_K
  949     int depth;
  950 #endif
  951 
  952     for_each_depth_k() {
  953         DES_bs_vector *kp = (DES_bs_vector *)&DES_bs_all.K[0] DEPTH_K;
  954         int ic;
  955         for (ic = 0; ic < 7; ic++) {
  956             DES_bs_vector *vp =
  957                 (DES_bs_vector *)&DES_bs_all.xkeys.v[ic][0] DEPTH_K;
  958             LOAD_V
  959             FINALIZE_NEXT_KEY_BIT_0
  960             FINALIZE_NEXT_KEY_BIT_1
  961             FINALIZE_NEXT_KEY_BIT_2
  962             FINALIZE_NEXT_KEY_BIT_3
  963             FINALIZE_NEXT_KEY_BIT_4
  964             FINALIZE_NEXT_KEY_BIT_5
  965             FINALIZE_NEXT_KEY_BIT_6
  966             FINALIZE_NEXT_KEY_BIT_7
  967         }
  968     }
  969 }
  970 
  971 #undef v1
  972 #undef v2
  973 #undef v3
  974 #undef v5
  975 #undef v6
  976 #undef v7
  977 
  978 #undef kd
  979 #if DES_BS_VECTOR_LOOPS
  980 #define kd              [depth]
  981 #else
  982 #define kd              [0]
  983 #endif
  984 
  985 int DES_bs_crypt_LM(int *pcount, struct db_salt *salt)
  986 {
  987     int keys_count = *pcount;
  988 #if DES_bs_mt
  989     int t, n = (keys_count + (DES_BS_DEPTH - 1)) / DES_BS_DEPTH;
  990 #endif
  991 
  992 #ifdef _OPENMP
  993 #pragma omp parallel for default(none) private(t) shared(n, DES_bs_all_p, keys_count)
  994 #endif
  995     for_each_t(n) {
  996         ARCH_WORD **k;
  997         int rounds;
  998 #if DES_BS_VECTOR_LOOPS
  999         int depth;
 1000 #endif
 1001 
 1002         {
 1003             vtype z = vzero, o = vones;
 1004             DES_bs_set_block_8(0, z, z, z, z, z, z, z, z);
 1005             DES_bs_set_block_8(8, o, o, o, z, o, z, z, z);
 1006             DES_bs_set_block_8(16, z, z, z, z, z, z, z, o);
 1007             DES_bs_set_block_8(24, z, z, o, z, z, o, o, o);
 1008             DES_bs_set_block_8(32, z, z, z, o, z, o, o, o);
 1009             DES_bs_set_block_8(40, z, z, z, z, z, o, z, z);
 1010             DES_bs_set_block_8(48, o, o, z, z, z, z, o, z);
 1011             DES_bs_set_block_8(56, o, z, o, z, o, o, o, o);
 1012         }
 1013 
 1014 #if DES_bs_mt
 1015         DES_bs_finalize_keys_LM(t);
 1016 #else
 1017         DES_bs_finalize_keys_LM();
 1018 #endif
 1019 
 1020         k = DES_bs_all.KS.p;
 1021         rounds = 8;
 1022 
 1023         do {
 1024             for_each_depth()
 1025             s1(y(31, 0), y(0, 1), y(1, 2),
 1026                 y(2, 3), y(3, 4), y(4, 5),
 1027                 z(40), z(48), z(54), z(62));
 1028             for_each_depth()
 1029             s2(y(3, 6), y(4, 7), y(5, 8),
 1030                 y(6, 9), y(7, 10), y(8, 11),
 1031                 z(44), z(59), z(33), z(49));
 1032             for_each_depth()
 1033             s3(y(7, 12), y(8, 13), y(9, 14),
 1034                 y(10, 15), y(11, 16), y(12, 17),
 1035                 z(55), z(47), z(61), z(37));
 1036             for_each_depth()
 1037             s4(y(11, 18), y(12, 19), y(13, 20),
 1038                 y(14, 21), y(15, 22), y(16, 23),
 1039                 z(57), z(51), z(41), z(32));
 1040             for_each_depth()
 1041             s5(y(15, 24), y(16, 25), y(17, 26),
 1042                 y(18, 27), y(19, 28), y(20, 29),
 1043                 z(39), z(45), z(56), z(34));
 1044             for_each_depth()
 1045             s6(y(19, 30), y(20, 31), y(21, 32),
 1046                 y(22, 33), y(23, 34), y(24, 35),
 1047                 z(35), z(60), z(42), z(50));
 1048             for_each_depth()
 1049             s7(y(23, 36), y(24, 37), y(25, 38),
 1050                 y(26, 39), y(27, 40), y(28, 41),
 1051                 z(63), z(43), z(53), z(38));
 1052             for_each_depth()
 1053             s8(y(27, 42), y(28, 43), y(29, 44),
 1054                 y(30, 45), y(31, 46), y(0, 47),
 1055                 z(36), z(58), z(46), z(52));
 1056 
 1057             for_each_depth()
 1058             s1(y(63, 48), y(32, 49), y(33, 50),
 1059                 y(34, 51), y(35, 52), y(36, 53),
 1060                 z(8), z(16), z(22), z(30));
 1061             for_each_depth()
 1062             s2(y(35, 54), y(36, 55), y(37, 56),
 1063                 y(38, 57), y(39, 58), y(40, 59),
 1064                 z(12), z(27), z(1), z(17));
 1065             for_each_depth()
 1066             s3(y(39, 60), y(40, 61), y(41, 62),
 1067                 y(42, 63), y(43, 64), y(44, 65),
 1068                 z(23), z(15), z(29), z(5));
 1069             for_each_depth()
 1070             s4(y(43, 66), y(44, 67), y(45, 68),
 1071                 y(46, 69), y(47, 70), y(48, 71),
 1072                 z(25), z(19), z(9), z(0));
 1073             for_each_depth()
 1074             s5(y(47, 72), y(48, 73), y(49, 74),
 1075                 y(50, 75), y(51, 76), y(52, 77),
 1076                 z(7), z(13), z(24), z(2));
 1077             for_each_depth()
 1078             s6(y(51, 78), y(52, 79), y(53, 80),
 1079                 y(54, 81), y(55, 82), y(56, 83),
 1080                 z(3), z(28), z(10), z(18));
 1081             for_each_depth()
 1082             s7(y(55, 84), y(56, 85), y(57, 86),
 1083                 y(58, 87), y(59, 88), y(60, 89),
 1084                 z(31), z(11), z(21), z(6));
 1085             for_each_depth()
 1086             s8(y(59, 90), y(60, 91), y(61, 92),
 1087                 y(62, 93), y(63, 94), y(32, 95),
 1088                 z(4), z(26), z(14), z(20));
 1089 
 1090             k += 96;
 1091         } while (--rounds);
 1092     }
 1093 
 1094     return keys_count;
 1095 }
 1096 #endif