bn_exp.c (openssl-1.1.1o) | : | bn_exp.c (openssl-1.1.1p) | ||
---|---|---|---|---|
skipping to change at line 903 | skipping to change at line 903 | |||
top *= 2; | top *= 2; | |||
/* back to 32-bit domain */ | /* back to 32-bit domain */ | |||
tmp.top = top; | tmp.top = top; | |||
bn_correct_top(&tmp); | bn_correct_top(&tmp); | |||
OPENSSL_cleanse(np, top * sizeof(BN_ULONG)); | OPENSSL_cleanse(np, top * sizeof(BN_ULONG)); | |||
} else | } else | |||
#endif | #endif | |||
#if defined(OPENSSL_BN_ASM_MONT5) | #if defined(OPENSSL_BN_ASM_MONT5) | |||
if (window == 5 && top > 1) { | if (window == 5 && top > 1) { | |||
/* | /* | |||
* This optimization uses ideas from http://eprint.iacr.org/2011/239, | * This optimization uses ideas from https://eprint.iacr.org/2011/239, | |||
* specifically optimization of cache-timing attack countermeasures | * specifically optimization of cache-timing attack countermeasures, | |||
* and pre-computation optimization. | * pre-computation optimization, and Almost Montgomery Multiplication. | |||
*/ | * | |||
* The paper discusses a 4-bit window to optimize 512-bit modular | ||||
/* | * exponentiation, used in RSA-1024 with CRT, but RSA-1024 is no longer | |||
* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as | * important. | |||
* 512-bit RSA is hardly relevant, we omit it to spare size... | * | |||
* |bn_mul_mont_gather5| and |bn_power5| implement the "almost" | ||||
* reduction variant, so the values here may not be fully reduced. | ||||
* They are bounded by R (i.e. they fit in |top| words), not |m|. | ||||
* Additionally, we pass these "almost" reduced inputs into | ||||
* |bn_mul_mont|, which implements the normal reduction variant. | ||||
* Given those inputs, |bn_mul_mont| may not give reduced | ||||
* output, but it will still produce "almost" reduced output. | ||||
*/ | */ | |||
void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, | void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, | |||
const void *table, const BN_ULONG *np, | const void *table, const BN_ULONG *np, | |||
const BN_ULONG *n0, int num, int power); | const BN_ULONG *n0, int num, int power); | |||
void bn_scatter5(const BN_ULONG *inp, size_t num, | void bn_scatter5(const BN_ULONG *inp, size_t num, | |||
void *table, size_t power); | void *table, size_t power); | |||
void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power); | void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power); | |||
void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, | void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, | |||
const void *table, const BN_ULONG *np, | const void *table, const BN_ULONG *np, | |||
const BN_ULONG *n0, int num, int power); | const BN_ULONG *n0, int num, int power); | |||
int bn_get_bits5(const BN_ULONG *ap, int off); | int bn_get_bits5(const BN_ULONG *ap, int off); | |||
int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap, | ||||
const BN_ULONG *not_used, const BN_ULONG *np, | ||||
const BN_ULONG *n0, int num); | ||||
BN_ULONG *n0 = mont->n0, *np; | BN_ULONG *n0 = mont->n0, *np; | |||
/* | /* | |||
* BN_to_montgomery can contaminate words above .top [in | * BN_to_montgomery can contaminate words above .top [in | |||
* BN_DEBUG[_DEBUG] build]... | * BN_DEBUG[_DEBUG] build]... | |||
*/ | */ | |||
for (i = am.top; i < top; i++) | for (i = am.top; i < top; i++) | |||
am.d[i] = 0; | am.d[i] = 0; | |||
for (i = tmp.top; i < top; i++) | for (i = tmp.top; i < top; i++) | |||
skipping to change at line 1013 | skipping to change at line 1017 | |||
bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, | bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, | |||
bn_get_bits5(p->d, bits -= 5)); | bn_get_bits5(p->d, bits -= 5)); | |||
} | } | |||
} else { | } else { | |||
while (bits > 0) { | while (bits > 0) { | |||
bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, | bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, | |||
bn_get_bits5(p->d, bits -= 5)); | bn_get_bits5(p->d, bits -= 5)); | |||
} | } | |||
} | } | |||
ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top); | ||||
tmp.top = top; | tmp.top = top; | |||
bn_correct_top(&tmp); | /* | |||
if (ret) { | * The result is now in |tmp| in Montgomery form, but it may not be | |||
if (!BN_copy(rr, &tmp)) | * fully reduced. This is within bounds for |BN_from_montgomery| | |||
ret = 0; | * (tmp < R <= m*R) so it will, when converting from Montgomery form, | |||
goto err; /* non-zero ret means it's not error */ | * produce a fully reduced result. | |||
} | * | |||
* This differs from Figure 2 of the paper, which uses AMM(h, 1) to | ||||
* convert from Montgomery form with unreduced output, followed by an | ||||
* extra reduction step. In the paper's terminology, we replace | ||||
* steps 9 and 10 with MM(h, 1). | ||||
*/ | ||||
} else | } else | |||
#endif | #endif | |||
{ | { | |||
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, window)) | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, window)) | |||
goto err; | goto err; | |||
if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, window)) | if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, window)) | |||
goto err; | goto err; | |||
/* | /* | |||
* If the window size is greater than 1, then calculate | * If the window size is greater than 1, then calculate | |||
End of changes. 4 change blocks. | ||||
18 lines changed or deleted | 26 lines changed or added |