speck.c (n2n-2.8) | : | speck.c (n2n-3.0) | ||
---|---|---|---|---|
// cipher SPECK -- 128 bit block size -- 256 bit key size -- CTR mode | /** | |||
* (C) 2007-21 - ntop.org and contributors | ||||
* | ||||
* This program is free software; you can redistribute it and/or modify | ||||
* it under the terms of the GNU General Public License as published by | ||||
* the Free Software Foundation; either version 3 of the License, or | ||||
* (at your option) any later version. | ||||
* | ||||
* This program is distributed in the hope that it will be useful, | ||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
* GNU General Public License for more details. | ||||
* | ||||
* You should have received a copy of the GNU General Public License | ||||
* along with this program; if not see see <http://www.gnu.org/licenses/> | ||||
* | ||||
*/ | ||||
// cipher SPECK -- 128 bit block size -- 128 and 256 bit key size -- CTR mode | ||||
// taken from (and modified: removed pure crypto-stream generation and seperated key expansion) | // taken from (and modified: removed pure crypto-stream generation and seperated key expansion) | |||
// https://github.com/nsacyber/simon-speck-supercop/blob/master/crypto_stream/sp eck128256ctr/ | // https://github.com/nsacyber/simon-speck-supercop/blob/master/crypto_stream/sp eck128256ctr/ | |||
#include <stdlib.h> | ||||
#include "portable_endian.h" | ||||
#include "speck.h" | #include "speck.h" | |||
#if defined (__AVX2__) // AVX support ------------------------------------------ | #if defined (__AVX512F__) // AVX512 support ----------------------------------- | |||
---------- | ----------------------------------- | |||
#define LCS(x,r) (((x)<<r)|((x)>>(64-r))) | ||||
#define RCS(x,r) (((x)>>r)|((x)<<(64-r))) | ||||
#define SET _mm512_set_epi64 | ||||
#define XOR _mm512_xor_si512 | ||||
#define ADD _mm512_add_epi64 | ||||
#define AND _mm512_and_si512 | ||||
#define ROL(X,r) (_mm512_rol_epi64(X,r)) | ||||
#define ROR(X,r) (_mm512_ror_epi64(X,r)) | ||||
#define _q8 SET(0x7LL,0x3LL,0x6LL,0x2LL,0x5LL,0x1LL,0x4LL,0x0LL) | ||||
#define _eight SET(0x8LL,0x8LL,0x8LL,0x8LL,0x8LL,0x8LL,0x8LL,0x8LL) | ||||
#define SET1(X,c) (X=SET(c,c,c,c,c,c,c,c)) | ||||
#define SET8(X,c) (X=SET(c,c,c,c,c,c,c,c), X=ADD(X,_q8)) | ||||
#define LOW _mm512_unpacklo_epi64 | ||||
#define HIGH _mm512_unpackhi_epi64 | ||||
#define LD(ip) (_mm512_load_epi64(((void *)(ip)))) | ||||
#define ST(ip,X) _mm512_storeu_si512((void *)(ip),X) | ||||
#define STORE(out,X,Y) (ST(out,LOW(Y,X)), ST(out+64,HIGH(Y,X))) | ||||
#define XOR_STORE(in,out,X,Y) (ST(out,XOR(LD(in),LOW(Y,X))), ST(out+64,XOR(LD(in | ||||
+64),HIGH(Y,X)))) | ||||
#define Rx8(X,Y,k) (X[0]=XOR(ADD(ROR(X[0],8),Y[0]),k), \ | ||||
Y[0]=XOR(ROL(Y[0],3),X[0])) | ||||
#define Rx16(X,Y,k) (X[0]=XOR(ADD(ROR(X[0],8),Y[0]),k), X[1]=XOR(ADD(ROR(X[1],8) | ||||
,Y[1]),k), \ | ||||
Y[0]=XOR(ROL(Y[0],3),X[0]), Y[1]=XOR(ROL(Y[1],3),X[1])) | ||||
#define Rx24(X,Y,k) (X[0]=XOR(ADD(ROR(X[0],8),Y[0]),k), X[1]=XOR(ADD(ROR(X[1],8) | ||||
,Y[1]),k), X[2]=XOR(ADD(ROR(X[2],8),Y[2]),k), \ | ||||
Y[0]=XOR(ROL(Y[0],3),X[0]), Y[1]=XOR(ROL(Y[1],3),X[1]), Y[2 | ||||
]=XOR(ROL(Y[2],3),X[2])) | ||||
#define Rx32(X,Y,k) (X[0]=XOR(ADD(ROR(X[0],8),Y[0]),k), X[1]=XOR(ADD(ROR(X[1],8) | ||||
,Y[1]),k), \ | ||||
X[2]=XOR(ADD(ROR(X[2],8),Y[2]),k), X[3]=XOR(ADD(ROR(X[3],8) | ||||
,Y[3]),k), \ | ||||
Y[0]=XOR(ROL(Y[0],3),X[0]), Y[1]=XOR(ROL(Y[1],3),X[1]), | ||||
\ | ||||
Y[2]=XOR(ROL(Y[2],3),X[2]), Y[3]=XOR(ROL(Y[3],3),X[3])) | ||||
#define Rx1(x,y,k) (x[0]=RCS(x[0],8), x[0]+=y[0], x[0]^=k, y[0]=LCS(y[0],3), y[ | ||||
0]^=x[0]) | ||||
#define Rx1b(x,y,k) (x=RCS(x,8), x+=y, x^=k, y=LCS(y,3), y^=x) | ||||
#define Rx2(x,y,k) (x[0]=RCS(x[0],8), x[1]=RCS(x[1],8), x[0]+=y[0], x[1]+=y[1], | ||||
\ | ||||
x[0]^=k, x[1]^=k, y[0]=LCS(y[0],3), y[1]=LCS(y[1],3), y[0]^ | ||||
=x[0], y[1]^=x[1]) | ||||
#define Encrypt_128(X,Y,k,n) (Rx##n(X,Y,k[0]), Rx##n(X,Y,k[1]), Rx##n(X,Y,k[2] | ||||
), Rx##n(X,Y,k[3]), Rx##n(X,Y,k[4]), Rx##n(X,Y,k[5]), Rx##n(X,Y,k[6]), Rx## | ||||
n(X,Y,k[7]), \ | ||||
Rx##n(X,Y,k[8]), Rx##n(X,Y,k[9]), Rx##n(X,Y,k[10 | ||||
]), Rx##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx## | ||||
n(X,Y,k[15]), \ | ||||
Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18 | ||||
]), Rx##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx## | ||||
n(X,Y,k[23]), \ | ||||
Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26 | ||||
]), Rx##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx## | ||||
n(X,Y,k[31])) | ||||
#define Encrypt_256(X,Y,k,n) (Encrypt_128(X,Y,k,n), \ | ||||
Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33])) | ||||
#define RK(X,Y,k,key,i) (SET1(k[i],Y), key[i]=Y, X=RCS(X,8), X+=Y, X^=i, Y=LCS | ||||
(Y,3), Y^=X) | ||||
#define EK(A,B,C,D,k,key) (RK(B,A,k,key,0), RK(C,A,k,key,1), RK(D,A,k,key,2), | ||||
RK(B,A,k,key,3), RK(C,A,k,key,4), RK(D,A,k,key,5), RK(B,A,k,key,6), \ | ||||
RK(C,A,k,key,7), RK(D,A,k,key,8), RK(B,A,k,key,9), | ||||
RK(C,A,k,key,10), RK(D,A,k,key,11), RK(B,A,k,key,12), RK(C,A,k,key,13), \ | ||||
RK(D,A,k,key,14), RK(B,A,k,key,15), RK(C,A,k,key,16), | ||||
RK(D,A,k,key,17), RK(B,A,k,key,18), RK(C,A,k,key,19), RK(D,A,k,key,20), \ | ||||
RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), | ||||
RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \ | ||||
RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), | ||||
RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33)) | ||||
#define Encrypt_Dispatcher(keysize) | ||||
\ | ||||
u64 x[2], y[2]; | ||||
\ | ||||
u512 X[4], Y[4]; | ||||
\ | ||||
unsigned char block1024[128]; | ||||
\ | ||||
\ | ||||
if(numbytes == 16) { | ||||
\ | ||||
x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; | ||||
\ | ||||
Encrypt_##keysize(x, y, ctx->key, 1); | ||||
\ | ||||
((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; | ||||
\ | ||||
return 0; | ||||
\ | ||||
} | ||||
\ | ||||
\ | ||||
if(numbytes == 32) { | ||||
\ | ||||
x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; | ||||
\ | ||||
x[1] = nonce[1]; y[1] = nonce[0]; nonce[0]++; | ||||
\ | ||||
Encrypt_##keysize(x, y, ctx->key, 2); | ||||
\ | ||||
((u64 *)out)[1] = x[0] ^ ((u64 *)in)[1]; ((u64 *)out)[0] = y[0] ^ ((u64 | ||||
*)in)[0]; \ | ||||
((u64 *)out)[3] = x[1] ^ ((u64 *)in)[3]; ((u64 *)out)[2] = y[1] ^ ((u64 | ||||
*)in)[2]; \ | ||||
return 0; | ||||
\ | ||||
} | ||||
\ | ||||
\ | ||||
if(numbytes == 64) { | ||||
\ | ||||
SET1(X[0], nonce[1]); | ||||
\ | ||||
SET8(Y[0], nonce[0]); | ||||
\ | ||||
Encrypt_##keysize(X, Y, ctx->rk, 8); | ||||
\ | ||||
nonce[0] += (numbytes >> 4); | ||||
\ | ||||
memcpy(block1024, in, 64); | ||||
\ | ||||
XOR_STORE(block1024, block1024, X[0], Y[0]); | ||||
\ | ||||
memcpy(out, block1024, 64); | ||||
\ | ||||
return 0; | ||||
\ | ||||
} | ||||
\ | ||||
\ | ||||
SET1(X[0], nonce[1]); SET8(Y[0], nonce[0]); | ||||
\ | ||||
\ | ||||
if(numbytes == 128) | ||||
\ | ||||
Encrypt_##keysize(X, Y, ctx->rk, 8); | ||||
\ | ||||
else { | ||||
\ | ||||
X[1] = X[0]; | ||||
\ | ||||
Y[1] = ADD(Y[0], _eight); | ||||
\ | ||||
if(numbytes == 256) | ||||
\ | ||||
Encrypt_##keysize(X, Y, ctx->rk, 16); | ||||
\ | ||||
else { | ||||
\ | ||||
X[2] = X[0]; | ||||
\ | ||||
Y[2] = ADD(Y[1], _eight); | ||||
\ | ||||
if(numbytes == 384) | ||||
\ | ||||
Encrypt_##keysize(X, Y, ctx->rk, 24); | ||||
\ | ||||
else { | ||||
\ | ||||
X[3] = X[0]; | ||||
\ | ||||
Y[3] = ADD(Y[2], _eight); | ||||
\ | ||||
Encrypt_##keysize(X, Y, ctx->rk, 32); | ||||
\ | ||||
} | ||||
\ | ||||
} | ||||
\ | ||||
} | ||||
\ | ||||
\ | ||||
nonce[0] += (numbytes >> 4); | ||||
\ | ||||
\ | ||||
XOR_STORE(in, out, X[0], Y[0]); | ||||
\ | ||||
if (numbytes >= 256) | ||||
\ | ||||
XOR_STORE(in + 128, out + 128, X[1], Y[1]); | ||||
\ | ||||
if(numbytes >= 384) | ||||
\ | ||||
XOR_STORE(in + 256, out + 256, X[2], Y[2]); | ||||
\ | ||||
if(numbytes >= 512) | ||||
\ | ||||
XOR_STORE(in + 384, out + 384, X[3], Y[3]); | ||||
\ | ||||
\ | ||||
return 0 | ||||
static int speck_encrypt_xor(unsigned char *out, const unsigned char *in, u64 no | ||||
nce[], speck_context_t *ctx, int numbytes) { | ||||
if(ctx->keysize == 256) { | ||||
Encrypt_Dispatcher(256); | ||||
} else { | ||||
Encrypt_Dispatcher(128); | ||||
} | ||||
} | ||||
static int internal_speck_ctr(unsigned char *out, const unsigned char *in, unsig | ||||
ned long long inlen, | ||||
const unsigned char *n, speck_context_t *ctx) { | ||||
int i; | ||||
u64 nonce[2]; | ||||
unsigned char block[16]; | ||||
u64 * const block64 = (u64 *)block; | ||||
if (!inlen) | ||||
return 0; | ||||
nonce[0] = ((u64 *)n)[0]; | ||||
nonce[1] = ((u64 *)n)[1]; | ||||
while(inlen >= 512) { | ||||
speck_encrypt_xor(out, in, nonce, ctx, 512); | ||||
in += 512; inlen -= 512; out += 512; | ||||
} | ||||
if(inlen >= 384) { | ||||
speck_encrypt_xor(out, in, nonce, ctx, 384); | ||||
in += 384; inlen -= 384; out += 384; | ||||
} | ||||
if(inlen >= 256) { | ||||
speck_encrypt_xor(out, in, nonce, ctx, 256); | ||||
in += 256; inlen -= 256; out += 256; | ||||
} | ||||
if(inlen >= 128) { | ||||
speck_encrypt_xor(out, in, nonce, ctx, 128); | ||||
in += 128; inlen -= 128; out += 128; | ||||
} | ||||
if(inlen >= 64) { | ||||
speck_encrypt_xor(out, in, nonce, ctx, 64); | ||||
in += 64; inlen -= 64; out += 64; | ||||
} | ||||
if(inlen >= 32) { | ||||
speck_encrypt_xor(out, in, nonce, ctx, 32); | ||||
in += 32; inlen -= 32; out += 32; | ||||
} | ||||
if(inlen >= 16) { | ||||
speck_encrypt_xor(block, in, nonce, ctx, 16); | ||||
((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0]; | ||||
((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1]; | ||||
in += 16; inlen -= 16; out += 16; | ||||
} | ||||
if(inlen > 0) { | ||||
speck_encrypt_xor(block, in, nonce, ctx, 16); | ||||
for(i = 0; i < inlen; i++) | ||||
out[i] = block[i] ^ in[i]; | ||||
} | ||||
return 0; | ||||
} | ||||
static int speck_expand_key (speck_context_t *ctx, const unsigned char *k, int k | ||||
eysize) { | ||||
u64 K[4]; | ||||
size_t i; | ||||
for(i = 0; i < (keysize >> 6); i++) | ||||
K[i] = ((u64 *)k)[i]; | ||||
// 128 bit has only two keys A and B thus replacing both C and D with B then | ||||
if(keysize == 128) { | ||||
EK(K[0], K[1], K[1], K[1], ctx->rk, ctx->key); | ||||
} else { | ||||
EK(K[0], K[1], K[2], K[3], ctx->rk, ctx->key); | ||||
} | ||||
ctx->keysize = keysize; | ||||
return 0; | ||||
} | ||||
#elif defined (__AVX2__) // AVX2 support -------------------------------------- | ||||
----------------------------------- | ||||
#define LCS(x,r) (((x)<<r)|((x)>>(64-r))) | #define LCS(x,r) (((x)<<r)|((x)>>(64-r))) | |||
#define RCS(x,r) (((x)>>r)|((x)<<(64-r))) | #define RCS(x,r) (((x)>>r)|((x)<<(64-r))) | |||
#define XOR _mm256_xor_si256 | #define XOR _mm256_xor_si256 | |||
#define AND _mm256_and_si256 | #define AND _mm256_and_si256 | |||
#define ADD _mm256_add_epi64 | #define ADD _mm256_add_epi64 | |||
#define SL _mm256_slli_epi64 | #define SL _mm256_slli_epi64 | |||
#define SR _mm256_srli_epi64 | #define SR _mm256_srli_epi64 | |||
skipping to change at line 45 | skipping to change at line 272 | |||
#define XOR_STORE_ALT(in,out,X,Y) (ST(out,XOR(LD(in),LOW(X,Y))), ST(out+32,XOR(L D(in+32),HIGH(X,Y)))) | #define XOR_STORE_ALT(in,out,X,Y) (ST(out,XOR(LD(in),LOW(X,Y))), ST(out+32,XOR(L D(in+32),HIGH(X,Y)))) | |||
#define SHFL _mm256_shuffle_epi8 | #define SHFL _mm256_shuffle_epi8 | |||
#define R8 SET(0x080f0e0d0c0b0a09LL,0x0007060504030201LL,0x080f0e0d0c0b0a09LL,0x 0007060504030201LL) | #define R8 SET(0x080f0e0d0c0b0a09LL,0x0007060504030201LL,0x080f0e0d0c0b0a09LL,0x 0007060504030201LL) | |||
#define L8 SET(0x0e0d0c0b0a09080fLL,0x0605040302010007LL,0x0e0d0c0b0a09080fLL,0x 0605040302010007LL) | #define L8 SET(0x0e0d0c0b0a09080fLL,0x0605040302010007LL,0x0e0d0c0b0a09080fLL,0x 0605040302010007LL) | |||
#define ROL8(X) (SHFL(X,L8)) | #define ROL8(X) (SHFL(X,L8)) | |||
#define ROR8(X) (SHFL(X,R8)) | #define ROR8(X) (SHFL(X,R8)) | |||
#define ROL(X,r) (XOR(SL(X,r),SR(X,(64-r)))) | #define ROL(X,r) (XOR(SL(X,r),SR(X,(64-r)))) | |||
#define ROR(X,r) (XOR(SR(X,r),SL(X,(64-r)))) | #define ROR(X,r) (XOR(SR(X,r),SL(X,(64-r)))) | |||
#define numrounds 34 | ||||
#define numkeywords 4 | ||||
#define R(X,Y,k) (X=XOR(ADD(ROR8(X),Y),k), Y=XOR(ROL(Y,3),X)) | #define R(X,Y,k) (X=XOR(ADD(ROR8(X),Y),k), Y=XOR(ROL(Y,3),X)) | |||
#define Rx4(X,Y,k) (R(X[0],Y[0],k)) | #define Rx4(X,Y,k) (R(X[0],Y[0],k)) | |||
#define Rx8(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k)) | #define Rx8(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k)) | |||
#define Rx12(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k), R(X[2],Y[2],k)) | #define Rx12(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k), R(X[2],Y[2],k)) | |||
#define Rx16(X,Y,k) (X[0]=ROR8(X[0]), X[0]=ADD(X[0],Y[0]), X[1]=ROR8(X[1]), | ||||
X[1]=ADD(X[1],Y[1]), \ | ||||
X[2]=ROR8(X[2]), X[2]=ADD(X[2],Y[2]), X[3]=ROR8(X[3]), | ||||
X[3]=ADD(X[3],Y[3]), \ | ||||
X[0]=XOR(X[0],k), X[1]=XOR(X[1],k), X[2]=XOR(X[2],k), | ||||
X[3]=XOR(X[3],k), \ | ||||
Z[0]=Y[0], Z[1]=Y[1], Z[2]=Y[2], | ||||
Z[3]=Y[3], \ | ||||
Z[0]=SL(Z[0],3), Y[0]=SR(Y[0],61), Z[1]=SL(Z[1],3), | ||||
Y[1]=SR(Y[1],61), \ | ||||
Z[2]=SL(Z[2],3), Y[2]=SR(Y[2],61), Z[3]=SL(Z[3],3), | ||||
Y[3]=SR(Y[3],61), \ | ||||
Y[0]=XOR(Y[0],Z[0]), Y[1]=XOR(Y[1],Z[1]), Y[2]=XOR(Y[2],Z[2 | ||||
]), Y[3]=XOR(Y[3],Z[3]), \ | ||||
Y[0]=XOR(X[0],Y[0]), Y[1]=XOR(X[1],Y[1]), Y[2]=XOR(X[2],Y[2 | ||||
]), Y[3]=XOR(X[3],Y[3])) | ||||
#define Rx16(X,Y,k) (X[0]=ROR8(X[0]), X[0]=ADD(X[0],Y[0]), X[1]=ROR8(X[1]), X[1] | #define Rx1(x,y,k) (x[0]=RCS(x[0],8), x[0]+=y[0], x[0]^=k, y[0]=LCS(y[0],3), y[ | |||
=ADD(X[1],Y[1]), \ | 0]^=x[0]) | |||
X[2]=ROR8(X[2]), X[2]=ADD(X[2],Y[2]), X[3]=ROR8(X[3]), X[3]= | ||||
ADD(X[3],Y[3]), \ | ||||
X[0]=XOR(X[0],k), X[1]=XOR(X[1],k), X[2]=XOR(X[2],k), X[3]=X | ||||
OR(X[3],k), \ | ||||
Z[0]=Y[0], Z[1]=Y[1], Z[2]=Y[2], Z[3]=Y[3], \ | ||||
Z[0]=SL(Z[0],3), Y[0]=SR(Y[0],61), Z[1]=SL(Z[1],3), Y[1]=SR | ||||
(Y[1],61), \ | ||||
Z[2]=SL(Z[2],3), Y[2]=SR(Y[2],61), Z[3]=SL(Z[3],3), Y[3]=SR | ||||
(Y[3],61), \ | ||||
Y[0]=XOR(Y[0],Z[0]), Y[1]=XOR(Y[1],Z[1]), Y[2]=XOR(Y[2],Z[2] | ||||
), Y[3]=XOR(Y[3],Z[3]), \ | ||||
Y[0]=XOR(X[0],Y[0]), Y[1]=XOR(X[1],Y[1]), Y[2]=XOR(X[2],Y[2] | ||||
), Y[3]=XOR(X[3],Y[3])) | ||||
#define Rx2(x,y,k) (x[0]=RCS(x[0],8), x[1]=RCS(x[1],8), x[0]+=y[0], x[1]+=y[1], | ||||
\ | ||||
x[0]^=k, x[1]^=k, y[0]=LCS(y[0],3), y[1]=LCS(y[1],3), y[0]^= | ||||
x[0], y[1]^=x[1]) | ||||
#define Rx1(x,y,k) (x[0]=RCS(x[0],8), x[0]+=y[0], x[0]^=k, y[0]=LCS(y[0],3), y[0 | ||||
]^=x[0]) | ||||
#define Rx1b(x,y,k) (x=RCS(x,8), x+=y, x^=k, y=LCS(y,3), y^=x) | #define Rx1b(x,y,k) (x=RCS(x,8), x+=y, x^=k, y=LCS(y,3), y^=x) | |||
#define Rx2(x,y,k) (x[0]=RCS(x[0],8), x[1]=RCS(x[1],8), x[0]+=y[0], x[1]+ | ||||
=y[1], \ | ||||
x[0]^=k, x[1]^=k, y[0]=LCS(y[0],3), y[1]= | ||||
LCS(y[1],3), y[0]^=x[0], y[1]^=x[1]) | ||||
#define Encrypt(X,Y,k,n) (Rx##n(X,Y,k[0]), Rx##n(X,Y,k[1]), Rx##n(X,Y,k[2]), | #define Encrypt_128(X,Y,k,n) (Rx##n(X,Y,k[0]), Rx##n(X,Y,k[1]), Rx##n(X,Y,k[2] | |||
Rx##n(X,Y,k[3]), Rx##n(X,Y,k[4]), Rx##n(X,Y,k[5]), Rx##n(X,Y,k[6]), Rx##n(X, | ), Rx##n(X,Y,k[3]), Rx##n(X,Y,k[4]), Rx##n(X,Y,k[5]), Rx##n(X,Y,k[6]), Rx## | |||
Y,k[7]), \ | n(X,Y,k[7]), \ | |||
Rx##n(X,Y,k[8]), Rx##n(X,Y,k[9]), Rx##n(X,Y,k[10]), R | Rx##n(X,Y,k[8]), Rx##n(X,Y,k[9]), Rx##n(X,Y,k[10 | |||
x##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx##n(X,Y | ]), Rx##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx## | |||
,k[15]), \ | n(X,Y,k[15]), \ | |||
Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18]), R | Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18 | |||
x##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx##n(X,Y | ]), Rx##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx## | |||
,k[23]), \ | n(X,Y,k[23]), \ | |||
Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26]), R | Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26 | |||
x##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx##n(X,Y | ]), Rx##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx## | |||
,k[31]), \ | n(X,Y,k[31])) | |||
Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33])) | ||||
#define RK(X,Y,k,key,i) (SET1(k[i],Y), key[i]=Y, X=RCS(X,8), X+=Y, X^=i, Y=LCS | ||||
(Y,3), Y^=X) | ||||
#define EK(A,B,C,D,k,key) (RK(B,A,k,key,0), RK(C,A,k,key,1), RK(D,A,k,key,2), | ||||
RK(B,A,k,key,3), RK(C,A,k,key,4), RK(D,A,k,key,5), RK(B,A,k,key,6), \ | ||||
RK(C,A,k,key,7), RK(D,A,k,key,8), RK(B,A,k,key,9), | ||||
RK(C,A,k,key,10), RK(D,A,k,key,11), RK(B,A,k,key,12), RK(C,A,k,key,13), \ | ||||
RK(D,A,k,key,14), RK(B,A,k,key,15), RK(C,A,k,key,16), | ||||
RK(D,A,k,key,17), RK(B,A,k,key,18), RK(C,A,k,key,19), RK(D,A,k,key,20), \ | ||||
RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), | ||||
RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \ | ||||
RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), | ||||
RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33)) | ||||
static int speck_encrypt_xor(unsigned char *out, const unsigned char *in, u64 no | #define Encrypt_256(X,Y,k,n) (Encrypt_128(X,Y,k,n), \ | |||
nce[], speck_context_t *ctx, int numbytes) { | Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33])) | |||
u64 x[2], y[2]; | #define RK(X,Y,k,key,i) (SET1(k[i],Y), key[i]=Y, X=RCS(X,8), X+=Y, X^=i, Y=LCS | |||
u256 X[4], Y[4], Z[4]; | (Y,3), Y^=X) | |||
if (numbytes == 16) { | #define EK(A,B,C,D,k,key) (RK(B,A,k,key,0), RK(C,A,k,key,1), RK(D,A,k,key,2), | |||
x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; | RK(B,A,k,key,3), RK(C,A,k,key,4), RK(D,A,k,key,5), RK(B,A,k,key,6), \ | |||
Encrypt (x, y, ctx->key, 1); | RK(C,A,k,key,7), RK(D,A,k,key,8), RK(B,A,k,key,9), | |||
((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; | RK(C,A,k,key,10), RK(D,A,k,key,11), RK(B,A,k,key,12), RK(C,A,k,key,13), \ | |||
return 0; | RK(D,A,k,key,14), RK(B,A,k,key,15), RK(C,A,k,key,16), | |||
} | RK(D,A,k,key,17), RK(B,A,k,key,18), RK(C,A,k,key,19), RK(D,A,k,key,20), \ | |||
RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), | ||||
RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \ | ||||
RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), | ||||
RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33)) | ||||
if (numbytes == 32) { | #define Encrypt_Dispatcher(keysize) | |||
x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; | \ | |||
x[1] = nonce[1]; y[1] = nonce[0]; nonce[0]++; | u64 x[2], y[2]; | |||
Encrypt (x , y, ctx->key, 2); | \ | |||
((u64 *)out)[1] = x[0] ^ ((u64 *)in)[1]; ((u64 *)out)[0] = y[0] ^ ((u64 *)in | u256 X[4], Y[4], Z[4]; | |||
)[0]; | \ | |||
((u64 *)out)[3] = x[1] ^ ((u64 *)in)[3]; ((u64 *)out)[2] = y[1] ^ ((u64 *)in | ||||
)[2]; | \ | |||
return 0; | if(numbytes == 16) { | |||
} | \ | |||
x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; | ||||
\ | ||||
Encrypt_##keysize(x, y, ctx->key, 1); | ||||
\ | ||||
((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; | ||||
\ | ||||
return 0; | ||||
\ | ||||
} | ||||
\ | ||||
\ | ||||
if(numbytes == 32) { | ||||
\ | ||||
x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; | ||||
\ | ||||
x[1] = nonce[1]; y[1] = nonce[0]; nonce[0]++; | ||||
\ | ||||
Encrypt_##keysize(x , y, ctx->key, 2); | ||||
\ | ||||
((u64 *)out)[1] = x[0] ^ ((u64 *)in)[1]; ((u64 *)out)[0] = y[0] ^ ((u64 | ||||
*)in)[0]; \ | ||||
((u64 *)out)[3] = x[1] ^ ((u64 *)in)[3]; ((u64 *)out)[2] = y[1] ^ ((u64 | ||||
*)in)[2]; \ | ||||
return 0; | ||||
\ | ||||
} | ||||
\ | ||||
\ | ||||
SET1(X[0], nonce[1]); SET4(Y[0], nonce[0]); | ||||
\ | ||||
\ | ||||
if(numbytes == 64) | ||||
\ | ||||
Encrypt_##keysize(X, Y, ctx->rk, 4); | ||||
\ | ||||
else { | ||||
\ | ||||
X[1] = X[0]; | ||||
\ | ||||
Y[1] = ADD(Y[0], _four); | ||||
\ | ||||
if(numbytes == 128) | ||||
\ | ||||
Encrypt_##keysize(X, Y, ctx->rk, 8); | ||||
\ | ||||
else { | ||||
\ | ||||
X[2] = X[0]; | ||||
\ | ||||
Y[2] = ADD(Y[1], _four); | ||||
\ | ||||
if(numbytes == 192) | ||||
\ | ||||
Encrypt_##keysize(X, Y, ctx->rk, 12); | ||||
\ | ||||
else { | ||||
\ | ||||
X[3] = X[0]; | ||||
\ | ||||
Y[3] = ADD(Y[2], _four); | ||||
\ | ||||
Encrypt_##keysize(X, Y, ctx->rk, 16); | ||||
\ | ||||
} | ||||
\ | ||||
} | ||||
\ | ||||
} | ||||
\ | ||||
\ | ||||
nonce[0] += (numbytes >> 4); | ||||
\ | ||||
\ | ||||
XOR_STORE(in, out, X[0], Y[0]); | ||||
\ | ||||
if (numbytes >= 128) | ||||
\ | ||||
XOR_STORE(in + 64, out + 64, X[1], Y[1]); | ||||
\ | ||||
if(numbytes >= 192) | ||||
\ | ||||
XOR_STORE(in + 128, out + 128, X[2], Y[2]); | ||||
\ | ||||
if(numbytes >= 256) | ||||
\ | ||||
XOR_STORE(in + 192, out + 192, X[3], Y[3]); | ||||
\ | ||||
\ | ||||
return 0 | ||||
SET1 (X[0], nonce[1]); SET4 (Y[0], nonce[0]); | static int speck_encrypt_xor(unsigned char *out, const unsigned char *in, u64 no nce[], speck_context_t *ctx, int numbytes) { | |||
if (numbytes == 64) | if(ctx->keysize == 256) { | |||
Encrypt (X, Y, ctx->rk, 4); | Encrypt_Dispatcher(256); | |||
else { | } else { | |||
X[1] = X[0]; | Encrypt_Dispatcher(128); | |||
Y[1] = ADD (Y[0], _four); | ||||
if (numbytes == 128) | ||||
Encrypt (X, Y, ctx->rk, 8); | ||||
else { | ||||
X[2] = X[0]; | ||||
Y[2] = ADD (Y[1], _four); | ||||
if (numbytes == 192) | ||||
Encrypt (X, Y, ctx->rk, 12); | ||||
else { | ||||
X[3] = X[0]; | ||||
Y[3] = ADD (Y[2], _four); | ||||
Encrypt (X, Y, ctx->rk, 16); | ||||
} | ||||
} | } | |||
} | ||||
nonce[0] += (numbytes>>4); | ||||
XOR_STORE (in, out, X[0], Y[0]); | ||||
if (numbytes >= 128) | ||||
XOR_STORE (in + 64, out + 64, X[1], Y[1]); | ||||
if (numbytes >= 192) | ||||
XOR_STORE (in + 128, out + 128, X[2], Y[2]); | ||||
if (numbytes >= 256) | ||||
XOR_STORE (in + 192, out + 192, X[3], Y[3]); | ||||
return 0; | ||||
} | } | |||
int speck_ctr( unsigned char *out, const unsigned char *in, unsigned long long i | static int internal_speck_ctr(unsigned char *out, const unsigned char *in, unsig | |||
nlen, | ned long long inlen, | |||
const unsigned char *n, speck_context_t *ctx) { | const unsigned char *n, speck_context_t *ctx) { | |||
int i; | ||||
u64 nonce[2]; | ||||
unsigned char block[16]; | ||||
u64 * const block64 = (u64 *)block; | ||||
if (!inlen) | int i; | |||
return 0; | u64 nonce[2]; | |||
unsigned char block[16]; | ||||
u64 * const block64 = (u64 *)block; | ||||
if (!inlen) | ||||
return 0; | ||||
nonce[0] = ((u64 *)n)[0]; | ||||
nonce[1] = ((u64 *)n)[1]; | ||||
while(inlen >= 256) { | ||||
speck_encrypt_xor(out, in, nonce, ctx, 256); | ||||
in += 256; inlen -= 256; out += 256; | ||||
} | ||||
nonce[0] = ((u64 *)n)[0]; | if(inlen >= 192) { | |||
nonce[1] = ((u64 *)n)[1]; | speck_encrypt_xor(out, in, nonce, ctx, 192); | |||
in += 192; inlen -= 192; out += 192; | ||||
} | ||||
while (inlen >= 256) { | if(inlen >= 128) { | |||
speck_encrypt_xor (out, in, nonce, ctx, 256); | speck_encrypt_xor(out, in, nonce, ctx, 128); | |||
in += 256; inlen -= 256; out += 256; | in += 128; inlen -= 128; out += 128; | |||
} | } | |||
if (inlen >= 192) { | if(inlen >= 64) { | |||
speck_encrypt_xor (out, in, nonce, ctx, 192); | speck_encrypt_xor(out, in, nonce, ctx, 64); | |||
in += 192; inlen -= 192; out += 192; | in += 64; inlen -= 64; out += 64; | |||
} | } | |||
if (inlen >= 128) { | if(inlen >= 32) { | |||
speck_encrypt_xor (out, in, nonce, ctx, 128); | speck_encrypt_xor(out, in, nonce, ctx, 32); | |||
in += 128; inlen -= 128; out += 128; | in += 32; inlen -= 32; out += 32; | |||
} | } | |||
if (inlen >= 64) { | if(inlen >= 16) { | |||
speck_encrypt_xor (out, in, nonce, ctx, 64); | speck_encrypt_xor(block, in, nonce, ctx, 16); | |||
in += 64; inlen -= 64; out += 64; | ((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0]; | |||
} | ((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1]; | |||
in += 16; inlen -= 16; out += 16; | ||||
} | ||||
if (inlen >= 32) { | if(inlen > 0) { | |||
speck_encrypt_xor (out, in, nonce, ctx, 32); | speck_encrypt_xor(block, in, nonce, ctx, 16); | |||
in += 32; inlen -= 32; out += 32; | for(i = 0; i < inlen; i++) | |||
} | out[i] = block[i] ^ in[i]; | |||
} | ||||
if (inlen >= 16) { | return 0; | |||
speck_encrypt_xor (block, in, nonce, ctx, 16); | } | |||
((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0]; | ||||
((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1]; | ||||
in += 16; inlen -= 16; out += 16; | ||||
} | ||||
if (inlen > 0) { | static int speck_expand_key (speck_context_t *ctx, const unsigned char *k, int k | |||
speck_encrypt_xor (block, in, nonce, ctx, 16); | eysize) { | |||
for (i = 0; i < inlen; i++) | ||||
out[i] = block[i] ^ in[i]; | ||||
} | ||||
return 0; | u64 K[4]; | |||
} | size_t i; | |||
int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { | for(i = 0; i < (keysize >> 6); i++) | |||
K[i] = ((u64 *)k)[i]; | ||||
u64 K[4]; | // 128 bit has only two keys A and B thus replacing both C and D with B then | |||
size_t i; | if(keysize == 128) { | |||
for (i = 0; i < numkeywords; i++) | EK(K[0], K[1], K[1], K[1], ctx->rk, ctx->key); | |||
K[i] = ((u64 *)k)[i]; | } else { | |||
EK(K[0], K[1], K[2], K[3], ctx->rk, ctx->key); | ||||
} | ||||
EK (K[0], K[1], K[2], K[3], ctx->rk, ctx->key); | ctx->keysize = keysize; | |||
return 0; | return 0; | |||
} | } | |||
#elif defined (__SSE4_2__) // SSE support -------------------------------------- ----------- | #elif defined (__SSE2__) // SSE support ---------------------------------------- ----------------------------------- | |||
#define LCS(x,r) (((x)<<r)|((x)>>(64-r))) | #define LCS(x,r) (((x)<<r)|((x)>>(64-r))) | |||
#define RCS(x,r) (((x)>>r)|((x)<<(64-r))) | #define RCS(x,r) (((x)>>r)|((x)<<(64-r))) | |||
#define XOR _mm_xor_si128 | #define XOR _mm_xor_si128 | |||
#define AND _mm_and_si128 | #define AND _mm_and_si128 | |||
#define ADD _mm_add_epi64 | #define ADD _mm_add_epi64 | |||
#define SL _mm_slli_epi64 | #define SL _mm_slli_epi64 | |||
#define SR _mm_srli_epi64 | #define SR _mm_srli_epi64 | |||
skipping to change at line 234 | skipping to change at line 472 | |||
#define LOW _mm_unpacklo_epi64 | #define LOW _mm_unpacklo_epi64 | |||
#define HIGH _mm_unpackhi_epi64 | #define HIGH _mm_unpackhi_epi64 | |||
#define LD(ip) _mm_loadu_si128((__m128i *)(ip)) | #define LD(ip) _mm_loadu_si128((__m128i *)(ip)) | |||
#define ST(ip,X) _mm_storeu_si128((__m128i *)(ip),X) | #define ST(ip,X) _mm_storeu_si128((__m128i *)(ip),X) | |||
#define STORE(out,X,Y) (ST(out,LOW(Y,X)), ST(out+16,HIGH(Y,X))) | #define STORE(out,X,Y) (ST(out,LOW(Y,X)), ST(out+16,HIGH(Y,X))) | |||
#define STORE_ALT(out,X,Y) (ST(out,LOW(X,Y)), ST(out+16,HIGH(X,Y))) | #define STORE_ALT(out,X,Y) (ST(out,LOW(X,Y)), ST(out+16,HIGH(X,Y))) | |||
#define XOR_STORE(in,out,X,Y) (ST(out,XOR(LD(in),LOW(Y,X))), ST(out+16,XOR(LD(in +16),HIGH(Y,X)))) | #define XOR_STORE(in,out,X,Y) (ST(out,XOR(LD(in),LOW(Y,X))), ST(out+16,XOR(LD(in +16),HIGH(Y,X)))) | |||
#define XOR_STORE_ALT(in,out,X,Y) (ST(out,XOR(LD(in),LOW(X,Y))), ST(out+16,XOR(L D(in+16),HIGH(X,Y)))) | #define XOR_STORE_ALT(in,out,X,Y) (ST(out,XOR(LD(in),LOW(X,Y))), ST(out+16,XOR(L D(in+16),HIGH(X,Y)))) | |||
#define ROL(X,r) (XOR(SL(X,r),SR(X,(64-r)))) | ||||
#define ROR(X,r) (XOR(SR(X,r),SL(X,(64-r)))) | ||||
#if defined (__SSSE3__) // even SSSE3 ------------------------------- | ||||
#define SHFL _mm_shuffle_epi8 | #define SHFL _mm_shuffle_epi8 | |||
#define R8 _mm_set_epi64x(0x080f0e0d0c0b0a09LL,0x0007060504030201LL) | #define R8 _mm_set_epi64x(0x080f0e0d0c0b0a09LL,0x0007060504030201LL) | |||
#define L8 _mm_set_epi64x(0x0e0d0c0b0a09080fLL,0x0605040302010007LL) | #define L8 _mm_set_epi64x(0x0e0d0c0b0a09080fLL,0x0605040302010007LL) | |||
#define ROL8(X) (SHFL(X,L8)) | #define ROL8(X) (SHFL(X,L8)) | |||
#define ROR8(X) (SHFL(X,R8)) | #define ROR8(X) (SHFL(X,R8)) | |||
#define ROL(X,r) (XOR(SL(X,r),SR(X,(64-r)))) | #else // regular SSE2 ------------------------------------------------ | |||
#define ROR(X,r) (XOR(SR(X,r),SL(X,(64-r)))) | #define ROL8(X) (ROL(X,8)) | |||
#define ROR8(X) (ROR(X,8)) | ||||
#define numrounds 34 | #endif // SSS3 vs. SSE2 ---------------------------------------------- | |||
#define numkeywords 4 | ||||
#define R(X,Y,k) (X=XOR(ADD(ROR8(X),Y),k), Y=XOR(ROL(Y,3),X)) | #define R(X,Y,k) (X=XOR(ADD(ROR8(X),Y),k), Y=XOR(ROL(Y,3),X)) | |||
#define Rx2(X,Y,k) (R(X[0],Y[0],k)) | #define Rx2(X,Y,k) (R(X[0],Y[0],k)) | |||
#define Rx4(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k)) | #define Rx4(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k)) | |||
#define Rx6(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k), R(X[2],Y[2],k)) | #define Rx6(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k), R(X[2],Y[2],k)) | |||
#define Rx8(X,Y,k) (X[0]=ROR8(X[0]), X[0]=ADD(X[0],Y[0]), X[1]=ROR8(X[1]), | ||||
#define Rx8(X,Y,k) (X[0]=ROR8(X[0]), X[0]=ADD(X[0],Y[0]), X[1]=ROR8(X[1]), X[1]= | X[1]=ADD(X[1],Y[1]), \ | |||
ADD(X[1],Y[1]), \ | X[2]=ROR8(X[2]), X[2]=ADD(X[2],Y[2]), X[3]=ROR8(X[3]), | |||
X[2]=ROR8(X[2]), X[2]=ADD(X[2],Y[2]), X[3]=ROR8(X[3]), X[3]= | X[3]=ADD(X[3],Y[3]), \ | |||
ADD(X[3],Y[3]), \ | X[0]=XOR(X[0],k), X[1]=XOR(X[1],k), X[2]=XOR(X[2],k), | |||
X[0]=XOR(X[0],k), X[1]=XOR(X[1],k), X[2]=XOR(X[2],k), X[3]=X | X[3]=XOR(X[3],k), \ | |||
OR(X[3],k), \ | Z[0]=Y[0], Z[1]=Y[1], Z[2]=Y[2], | |||
Z[0]=Y[0], Z[1]=Y[1], Z[2]=Y[2], Z[3]=Y[3], \ | Z[3]=Y[3], \ | |||
Z[0]=SL(Z[0],3), Y[0]=SR(Y[0],61), Z[1]=SL(Z[1],3), Y[1]=SR | Z[0]=SL(Z[0],3), Y[0]=SR(Y[0],61), Z[1]=SL(Z[1],3), | |||
(Y[1],61), \ | Y[1]=SR(Y[1],61), \ | |||
Z[2]=SL(Z[2],3), Y[2]=SR(Y[2],61), Z[3]=SL(Z[3],3), Y[3]=SR | Z[2]=SL(Z[2],3), Y[2]=SR(Y[2],61), Z[3]=SL(Z[3],3), | |||
(Y[3],61), \ | Y[3]=SR(Y[3],61), \ | |||
Y[0]=XOR(Y[0],Z[0]), Y[1]=XOR(Y[1],Z[1]), Y[2]=XOR(Y[2],Z[2] ), Y[3]=XOR(Y[3],Z[3]), \ | Y[0]=XOR(Y[0],Z[0]), Y[1]=XOR(Y[1],Z[1]), Y[2]=XOR(Y[2],Z[2] ), Y[3]=XOR(Y[3],Z[3]), \ | |||
Y[0]=XOR(X[0],Y[0]), Y[1]=XOR(X[1],Y[1]), Y[2]=XOR(X[2],Y[2] ), Y[3]=XOR(X[3],Y[3])) | Y[0]=XOR(X[0],Y[0]), Y[1]=XOR(X[1],Y[1]), Y[2]=XOR(X[2],Y[2] ), Y[3]=XOR(X[3],Y[3])) | |||
#define Rx1(x,y,k) (x[0]=RCS(x[0],8), x[0]+=y[0], x[0]^=k, y[0]=LCS(y[0],3), y[0 ]^=x[0]) | #define Rx1(x,y,k) (x[0]=RCS(x[0],8), x[0]+=y[0], x[0]^=k, y[0]=LCS(y[0],3), y[0 ]^=x[0]) | |||
#define Rx1b(x,y,k) (x=RCS(x,8), x+=y, x^=k, y=LCS(y,3), y^=x) | #define Rx1b(x,y,k) (x=RCS(x,8), x+=y, x^=k, y=LCS(y,3), y^=x) | |||
#define Encrypt(X,Y,k,n) (Rx##n(X,Y,k[0]), Rx##n(X,Y,k[1]), Rx##n(X,Y,k[2]), | #define Encrypt_128(X,Y,k,n) (Rx##n(X,Y,k[0]), Rx##n(X,Y,k[1]), Rx##n(X,Y,k[2] | |||
Rx##n(X,Y,k[3]), Rx##n(X,Y,k[4]), Rx##n(X,Y,k[5]), Rx##n(X,Y,k[6]), Rx##n(X, | ), Rx##n(X,Y,k[3]), Rx##n(X,Y,k[4]), Rx##n(X,Y,k[5]), Rx##n(X,Y,k[6]), Rx## | |||
Y,k[7]), \ | n(X,Y,k[7]), \ | |||
Rx##n(X,Y,k[8]), Rx##n(X,Y,k[9]), Rx##n(X,Y,k[10]), | Rx##n(X,Y,k[8]), Rx##n(X,Y,k[9]), Rx##n(X,Y,k[10 | |||
Rx##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx##n(X, | ]), Rx##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx## | |||
Y,k[15]), \ | n(X,Y,k[15]), \ | |||
Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18]), | Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18 | |||
Rx##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx##n(X, | ]), Rx##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx## | |||
Y,k[23]), \ | n(X,Y,k[23]), \ | |||
Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26]), | Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26 | |||
Rx##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx##n(X, | ]), Rx##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx## | |||
Y,k[31]), \ | n(X,Y,k[31])) | |||
Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33])) | ||||
#define Encrypt_256(X,Y,k,n) (Encrypt_128(X,Y,k,n), \ | ||||
Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33])) | ||||
#define RK(X,Y,k,key,i) (SET1(k[i],Y), key[i]=Y, X=RCS(X,8), X+=Y, X^=i, Y=LCS (Y,3), Y^=X) | #define RK(X,Y,k,key,i) (SET1(k[i],Y), key[i]=Y, X=RCS(X,8), X+=Y, X^=i, Y=LCS (Y,3), Y^=X) | |||
#define EK(A,B,C,D,k,key) (RK(B,A,k,key,0), RK(C,A,k,key,1), RK(D,A,k,key,2), RK(B,A,k,key,3), RK(C,A,k,key,4), RK(D,A,k,key,5), RK(B,A,k,key,6), \ | #define EK(A,B,C,D,k,key) (RK(B,A,k,key,0), RK(C,A,k,key,1), RK(D,A,k,key,2), RK(B,A,k,key,3), RK(C,A,k,key,4), RK(D,A,k,key,5), RK(B,A,k,key,6), \ | |||
RK(C,A,k,key,7), RK(D,A,k,key,8), RK(B,A,k,key,9), RK(C,A,k,key,10), RK(D,A,k,key,11), RK(B,A,k,key,12), RK(C,A,k,key,13), \ | RK(C,A,k,key,7), RK(D,A,k,key,8), RK(B,A,k,key,9), RK(C,A,k,key,10), RK(D,A,k,key,11), RK(B,A,k,key,12), RK(C,A,k,key,13), \ | |||
RK(D,A,k,key,14), RK(B,A,k,key,15), RK(C,A,k,key,16), RK(D,A,k,key,17), RK(B,A,k,key,18), RK(C,A,k,key,19), RK(D,A,k,key,20), \ | RK(D,A,k,key,14), RK(B,A,k,key,15), RK(C,A,k,key,16), RK(D,A,k,key,17), RK(B,A,k,key,18), RK(C,A,k,key,19), RK(D,A,k,key,20), \ | |||
RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \ | RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \ | |||
RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33)) | RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33)) | |||
static int speck_encrypt_xor (unsigned char *out, const unsigned char *in, u64 n | #define Encrypt_Dispatcher(keysize) \ | |||
once[], const speck_context_t ctx, int numbytes) { | u64 x[2], y[2]; \ | |||
u128 X[4], Y[4], Z[4]; \ | ||||
\ | ||||
if(numbytes == 16) { \ | ||||
x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; \ | ||||
Encrypt_##keysize(x, y, ctx.key, 1); \ | ||||
((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; \ | ||||
return 0; \ | ||||
} \ | ||||
\ | ||||
SET1(X[0], nonce[1]); SET2(Y[0], nonce[0]); \ | ||||
\ | ||||
if(numbytes == 32) \ | ||||
Encrypt_##keysize(X, Y, ctx.rk, 2); \ | ||||
else { \ | ||||
X[1] = X[0]; Y[1] = ADD(Y[0], _two); \ | ||||
if(numbytes == 64) \ | ||||
Encrypt_##keysize(X, Y, ctx.rk, 4); \ | ||||
else { \ | ||||
X[2] = X[0]; Y[2] = ADD(Y[1], _two); \ | ||||
if(numbytes == 96) \ | ||||
Encrypt_##keysize(X, Y, ctx.rk, 6); \ | ||||
else { \ | ||||
X[3] = X[0]; Y[3] = ADD(Y[2], _two); \ | ||||
Encrypt_##keysize(X, Y, ctx.rk, 8); \ | ||||
} \ | ||||
} \ | ||||
} \ | ||||
\ | ||||
nonce[0] += (numbytes >> 4); \ | ||||
\ | ||||
XOR_STORE(in, out, X[0], Y[0]); \ | ||||
if(numbytes >= 64) \ | ||||
XOR_STORE(in + 32, out + 32, X[1], Y[1]); \ | ||||
if(numbytes >= 96) \ | ||||
XOR_STORE(in + 64, out + 64, X[2], Y[2]); \ | ||||
if(numbytes >= 128) \ | ||||
XOR_STORE(in + 96, out + 96, X[3], Y[3]); \ | ||||
\ | ||||
return 0 | ||||
u64 x[2], y[2]; | // attention: ctx is provided by value as it is faster in this case, astonishing | |||
u128 X[4], Y[4], Z[4]; | ly | |||
static int speck_encrypt_xor (unsigned char *out, const unsigned char *in, u64 n | ||||
if (numbytes == 16) { | once[], const speck_context_t ctx, int numbytes) { | |||
x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++; | ||||
Encrypt (x, y, ctx.key, 1); | ||||
((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; | ||||
return 0; | ||||
} | ||||
SET1 (X[0], nonce[1]); SET2 (Y[0], nonce[0]); | ||||
if (numbytes == 32) | ||||
Encrypt (X, Y, ctx.rk, 2); | ||||
else { | ||||
X[1] = X[0]; Y[1] = ADD (Y[0], _two); | ||||
if (numbytes == 64) | ||||
Encrypt (X, Y, ctx.rk, 4); | ||||
else { | ||||
X[2] = X[0]; Y[2] = ADD (Y[1], _two); | ||||
if (numbytes == 96) | ||||
Encrypt (X, Y, ctx.rk, 6); | ||||
else { | ||||
X[3] = X[0]; Y[3] = ADD (Y[2], _two); | ||||
Encrypt (X, Y, ctx.rk, 8); | ||||
} | ||||
} | ||||
} | ||||
nonce[0] += (numbytes>>4); | ||||
XOR_STORE (in, out, X[0], Y[0]); | ||||
if (numbytes >= 64) | ||||
XOR_STORE (in + 32, out + 32, X[1], Y[1]); | ||||
if (numbytes >= 96) | ||||
XOR_STORE (in + 64, out + 64, X[2], Y[2]); | ||||
if (numbytes >= 128) | ||||
XOR_STORE (in + 96, out + 96, X[3], Y[3]); | ||||
return 0; | if(ctx.keysize == 256) { | |||
Encrypt_Dispatcher(256); | ||||
} else { | ||||
Encrypt_Dispatcher(128); | ||||
} | ||||
} | } | |||
int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long i | // attention: ctx is provided by value as it is faster in this case, astonishing | |||
nlen, | ly | |||
const unsigned char *n, const speck_context_t ctx) { | static int internal_speck_ctr (unsigned char *out, const unsigned char *in, unsi | |||
gned long long inlen, | ||||
int i; | const unsigned char *n, const speck_context_t ctx | |||
u64 nonce[2]; | ) { | |||
unsigned char block[16]; | ||||
u64 * const block64 = (u64 *)block; | int i; | |||
u64 nonce[2]; | ||||
if (!inlen) | unsigned char block[16]; | |||
return 0; | u64 * const block64 = (u64 *)block; | |||
if(!inlen) | ||||
return 0; | ||||
nonce[0] = ((u64 *)n)[0]; | ||||
nonce[1] = ((u64 *)n)[1]; | ||||
while(inlen >= 128) { | ||||
speck_encrypt_xor(out, in, nonce, ctx, 128); | ||||
in += 128; inlen -= 128; out += 128; | ||||
} | ||||
nonce[0] = ((u64 *)n)[0]; | if(inlen >= 96) { | |||
nonce[1] = ((u64 *)n)[1]; | speck_encrypt_xor(out, in, nonce, ctx, 96); | |||
in += 96; inlen -= 96; out += 96; | ||||
} | ||||
while (inlen >= 128) { | if(inlen >= 64) { | |||
speck_encrypt_xor (out, in, nonce, ctx, 128); | speck_encrypt_xor(out, in, nonce, ctx, 64); | |||
in += 128; inlen -= 128; out += 128; | in += 64; inlen -= 64; out += 64; | |||
} | } | |||
if (inlen >= 96) { | if(inlen >= 32) { | |||
speck_encrypt_xor (out, in, nonce, ctx, 96); | speck_encrypt_xor(out, in, nonce, ctx, 32); | |||
in += 96; inlen -= 96; out += 96; | in += 32; inlen -= 32; out += 32; | |||
} | } | |||
if (inlen >= 64) { | if(inlen >= 16) { | |||
speck_encrypt_xor (out, in, nonce, ctx, 64); | speck_encrypt_xor(block, in, nonce, ctx, 16); | |||
in += 64; inlen -= 64; out += 64; | ((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0]; | |||
} | ((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1]; | |||
in += 16; inlen -= 16; out += 16; | ||||
} | ||||
if (inlen >= 32) { | if(inlen > 0) { | |||
speck_encrypt_xor (out, in, nonce, ctx, 32); | speck_encrypt_xor (block, in, nonce, ctx, 16); | |||
in += 32; inlen -= 32; out += 32; | for(i = 0; i < inlen; i++) | |||
} | out[i] = block[i] ^ in[i]; | |||
} | ||||
if (inlen >= 16) { | return 0; | |||
speck_encrypt_xor (block, in, nonce, ctx, 16); | } | |||
((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0]; | ||||
((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1]; | ||||
in += 16; inlen -= 16; out += 16; | ||||
} | ||||
if (inlen > 0) { | static int speck_expand_key (speck_context_t *ctx, const unsigned char *k, int k | |||
speck_encrypt_xor (block, in, nonce, ctx, 16); | eysize) { | |||
for (i = 0; i < inlen; i++) | ||||
out[i] = block[i] ^ in[i]; | ||||
} | ||||
return 0; | u64 K[4]; | |||
} | size_t i; | |||
int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { | for(i = 0; i < (keysize >> 6 ); i++) | |||
K[i] = ((u64 *)k)[i]; | ||||
u64 K[4]; | // 128 bit has only two keys A and B thus replacing both C and D with B then | |||
size_t i; | if(keysize == 128) { | |||
for (i = 0; i < numkeywords; i++) | EK(K[0], K[1], K[1], K[1], ctx->rk, ctx->key); | |||
K[i] = ((u64 *)k)[i]; | } else { | |||
EK(K[0], K[1], K[2], K[3], ctx->rk, ctx->key); | ||||
} | ||||
EK (K[0], K[1], K[2], K[3], ctx->rk, ctx->key); | ctx->keysize = keysize; | |||
return 0; | return 0; | |||
} | } | |||
#elif defined (__ARM_NEON) // NEON support --------------------------------- ---------- | #elif defined (__ARM_NEON) && defined (SPECK_ARM_NEON) // NEON support ---- ----------------------------------- | |||
#define LCS(x,r) (((x)<<r)|((x)>>(64-r))) | #define LCS(x,r) (((x)<<r)|((x)>>(64-r))) | |||
#define RCS(x,r) (((x)>>r)|((x)<<(64-r))) | #define RCS(x,r) (((x)>>r)|((x)<<(64-r))) | |||
#define XOR veorq_u64 | #define XOR veorq_u64 | |||
#define AND vandq_u64 | #define AND vandq_u64 | |||
#define ADD vaddq_u64 | #define ADD vaddq_u64 | |||
#define SL vshlq_n_u64 | #define SL vshlq_n_u64 | |||
#define SR vshrq_n_u64 | #define SR vshrq_n_u64 | |||
skipping to change at line 412 | skipping to change at line 670 | |||
#define XOR_STORE(in,out,X,Y) (Y=XOR(Y,SET(((u64 *)(in))[2],((u64 *)(in))[0])), X=XOR(X,SET(((u64 *)(in))[3],((u64 *)(in))[1])), STORE(out,X,Y)) | #define XOR_STORE(in,out,X,Y) (Y=XOR(Y,SET(((u64 *)(in))[2],((u64 *)(in))[0])), X=XOR(X,SET(((u64 *)(in))[3],((u64 *)(in))[1])), STORE(out,X,Y)) | |||
#define ROR(X,r) vsriq_n_u64(SL(X,(64-r)),X,r) | #define ROR(X,r) vsriq_n_u64(SL(X,(64-r)),X,r) | |||
#define ROL(X,r) ROR(X,(64-r)) | #define ROL(X,r) ROR(X,(64-r)) | |||
#define tableR vcreate_u8(0x0007060504030201LL) | #define tableR vcreate_u8(0x0007060504030201LL) | |||
#define tableL vcreate_u8(0x0605040302010007LL) | #define tableL vcreate_u8(0x0605040302010007LL) | |||
#define ROR8(X) SET(vtbl1_u8((uint8x8_t)vget_low_u64(X),tableR), vtbl1_u8((uint8 x8_t)vget_high_u64(X),tableR)) | #define ROR8(X) SET(vtbl1_u8((uint8x8_t)vget_low_u64(X),tableR), vtbl1_u8((uint8 x8_t)vget_high_u64(X),tableR)) | |||
#define ROL8(X) SET(vtbl1_u8((uint8x8_t)vget_low_u64(X),tableL), vtbl1_u8((uint8 x8_t)vget_high_u64(X),tableL)) | #define ROL8(X) SET(vtbl1_u8((uint8x8_t)vget_low_u64(X),tableL), vtbl1_u8((uint8 x8_t)vget_high_u64(X),tableL)) | |||
#define numrounds 34 | ||||
#define numkeywords 4 | ||||
#define R(X,Y,k) (X=XOR(ADD(ROR8(X),Y),k), Y=XOR(ROL(Y,3),X)) | #define R(X,Y,k) (X=XOR(ADD(ROR8(X),Y),k), Y=XOR(ROL(Y,3),X)) | |||
#define Rx2(X,Y,k) (R(X[0],Y[0],k)) | #define Rx2(X,Y,k) (R(X[0],Y[0],k)) | |||
#define Rx4(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k)) | #define Rx4(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k)) | |||
#define Rx6(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k), R(X[2],Y[2],k)) | #define Rx6(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k), R(X[2],Y[2],k)) | |||
#define Rx8(X,Y,k) (X[0]=ROR8(X[0]), X[0]=ADD(X[0],Y[0]), X[0]=XOR(X[0],k), X[1] =ROR8(X[1]), X[1]=ADD(X[1],Y[1]), X[1]=XOR(X[1],k), \ | #define Rx8(X,Y,k) (X[0]=ROR8(X[0]), X[0]=ADD(X[0],Y[0]), X[0]=XOR(X[0],k), X[1] =ROR8(X[1]), X[1]=ADD(X[1],Y[1]), X[1]=XOR(X[1],k), \ | |||
X[2]=ROR8(X[2]), X[2]=ADD(X[2],Y[2]), X[2]=XOR(X[2],k), X[3]= | X[2]=ROR8(X[2]), X[2]=ADD(X[2],Y[2]), X[2]=XOR(X[2],k), X[3] | |||
ROR8(X[3]), X[3]=ADD(X[3],Y[3]), X[3]=XOR(X[3],k), \ | =ROR8(X[3]), X[3]=ADD(X[3],Y[3]), X[3]=XOR(X[3],k), \ | |||
Z[0]=SL(Y[0],3), Z[1]=SL(Y[1],3), Z[2]=SL(Y[2],3), Z[3]=SL(Y | Z[0]=SL(Y[0],3), Z[1]=SL(Y[1],3), Z[2]=SL(Y[2],3), Z[3]=SL(Y | |||
[3],3), \ | [3],3), \ | |||
Y[0]=SR(Y[0],61), Y[1]=SR(Y[1],61), Y[2]=SR(Y[2],61), Y[3]=S R(Y[3],61), \ | Y[0]=SR(Y[0],61), Y[1]=SR(Y[1],61), Y[2]=SR(Y[2],61), Y[3]=S R(Y[3],61), \ | |||
Y[0]=XOR(Y[0],Z[0]), Y[1]=XOR(Y[1],Z[1]), Y[2]=XOR(Y[2],Z[2] ), Y[3]=XOR(Y[3],Z[3]), \ | Y[0]=XOR(Y[0],Z[0]), Y[1]=XOR(Y[1],Z[1]), Y[2]=XOR(Y[2],Z[2] ), Y[3]=XOR(Y[3],Z[3]), \ | |||
Y[0]=XOR(X[0],Y[0]), Y[1]=XOR(X[1],Y[1]), Y[2]=XOR(X[2],Y[2] ), Y[3]=XOR(X[3],Y[3])) | Y[0]=XOR(X[0],Y[0]), Y[1]=XOR(X[1],Y[1]), Y[2]=XOR(X[2],Y[2] ), Y[3]=XOR(X[3],Y[3])) | |||
#define Rx1(x,y,k) (x[0]=RCS(x[0],8), x[0]+=y[0], x[0]^=k, y[0]=LCS(y[0],3), y[0 ]^=x[0]) | #define Rx1(x,y,k) (x[0]=RCS(x[0],8), x[0]+=y[0], x[0]^=k, y[0]=LCS(y[0],3), y[0 ]^=x[0]) | |||
#define Rx1b(x,y,k) (x=RCS(x,8), x+=y, x^=k, y=LCS(y,3), y^=x) | #define Rx1b(x,y,k) (x=RCS(x,8), x+=y, x^=k, y=LCS(y,3), y^=x) | |||
#define Encrypt(X,Y,k,n) (Rx##n(X,Y,k[0]), Rx##n(X,Y,k[1]), Rx##n(X,Y,k[2]), | #define Encrypt_128(X,Y,k,n) (Rx##n(X,Y,k[0]), Rx##n(X,Y,k[1]), Rx##n(X,Y,k[2] | |||
Rx##n(X,Y,k[3]), Rx##n(X,Y,k[4]), Rx##n(X,Y,k[5]), Rx##n(X,Y,k[6]), Rx##n(X, | ), Rx##n(X,Y,k[3]), Rx##n(X,Y,k[4]), Rx##n(X,Y,k[5]), Rx##n(X,Y,k[6]), Rx## | |||
Y,k[7]), \ | n(X,Y,k[7]), \ | |||
Rx##n(X,Y,k[8]), Rx##n(X,Y,k[9]), Rx##n(X,Y,k[10]), R | Rx##n(X,Y,k[8]), Rx##n(X,Y,k[9]), Rx##n(X,Y,k[10 | |||
x##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx##n(X,Y | ]), Rx##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx## | |||
,k[15]), \ | n(X,Y,k[15]), \ | |||
Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18]), R | Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18 | |||
x##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx##n(X,Y | ]), Rx##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx## | |||
,k[23]), \ | n(X,Y,k[23]), \ | |||
Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26]), R | Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26 | |||
x##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx##n(X,Y | ]), Rx##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx## | |||
,k[31]), \ | n(X,Y,k[31])) | |||
Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33])) | ||||
#define RK(X,Y,k,key,i) (SET1(k[i],Y), key[i]=Y, X=RCS(X,8), X+=Y, X^=i, Y=LCS(Y | #define Encrypt_256(X,Y,k,n) (Encrypt_128(X,Y,k,n), \ | |||
,3), Y^=X) | Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33])) | |||
#define EK(A,B,C,D,k,key) (RK(B,A,k,key,0), RK(C,A,k,key,1), RK(D,A,k,key,2), | #define RK(X,Y,k,key,i) (SET1(k[i],Y), key[i]=Y, X=RCS(X,8), X+=Y, X^=i, Y=LCS(Y | |||
RK(B,A,k,key,3), RK(C,A,k,key,4), RK(D,A,k,key,5), RK(B,A,k,key,6), \ | ,3), Y^=X) | |||
RK(C,A,k,key,7), RK(D,A,k,key,8), RK(B,A,k,key,9), | ||||
RK(C,A,k,key,10), RK(D,A,k,key,11), RK(B,A,k,key,12), RK(C,A,k,key,13), \ | ||||
RK(D,A,k,key,14), RK(B,A,k,key,15), RK(C,A,k,key,16), | ||||
RK(D,A,k,key,17), RK(B,A,k,key,18), RK(C,A,k,key,19), RK(D,A,k,key,20), \ | ||||
RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), | ||||
RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \ | ||||
RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), | ||||
RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33)) | ||||
static int speck_encrypt_xor (unsigned char *out, const unsigned char *in, u64 n | #define EK(A,B,C,D,k,key) (RK(B,A,k,key,0), RK(C,A,k,key,1), RK(D,A,k,key,2), | |||
once[], speck_context_t *ctx, int numbytes) { | RK(B,A,k,key,3), RK(C,A,k,key,4), RK(D,A,k,key,5), RK(B,A,k,key,6), \ | |||
RK(C,A,k,key,7), RK(D,A,k,key,8), RK(B,A,k,key,9), | ||||
RK(C,A,k,key,10), RK(D,A,k,key,11), RK(B,A,k,key,12), RK(C,A,k,key,13), \ | ||||
RK(D,A,k,key,14), RK(B,A,k,key,15), RK(C,A,k,key,16), | ||||
RK(D,A,k,key,17), RK(B,A,k,key,18), RK(C,A,k,key,19), RK(D,A,k,key,20), \ | ||||
RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), | ||||
RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \ | ||||
RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), | ||||
RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33)) | ||||
u64 x[2], y[2]; | #define Encrypt_Dispatcher(keysize) \ | |||
u128 X[4], Y[4], Z[4]; | u64 x[2], y[2]; \ | |||
u128 X[4], Y[4], Z[4]; \ | ||||
\ | ||||
if(numbytes == 16) { \ | ||||
x[0] = nonce[1]; y[0]=nonce[0]; nonce[0]++; \ | ||||
Encrypt_##keysize(x, y, ctx->key, 1); \ | ||||
((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; \ | ||||
return 0; \ | ||||
} \ | ||||
\ | ||||
SET1(X[0], nonce[1]); SET2(Y[0], nonce[0]); \ | ||||
\ | ||||
if(numbytes == 32) \ | ||||
Encrypt_##keysize(X, Y, ctx->rk, 2); \ | ||||
else { \ | ||||
X[1] = X[0]; SET2(Y[1], nonce[0]); \ | ||||
if(numbytes == 64) \ | ||||
Encrypt_##keysize(X, Y, ctx->rk, 4); \ | ||||
else { \ | ||||
X[2] = X[0]; SET2(Y[2], nonce[0]); \ | ||||
if(numbytes == 96) \ | ||||
Encrypt_##keysize(X, Y, ctx->rk, 6); \ | ||||
else { \ | ||||
X[3] = X[0]; SET2(Y[3], nonce[0]); \ | ||||
Encrypt_##keysize(X, Y, ctx->rk, 8); \ | ||||
} \ | ||||
} \ | ||||
} \ | ||||
\ | ||||
XOR_STORE(in, out, X[0], Y[0]); \ | ||||
if(numbytes >= 64) \ | ||||
XOR_STORE(in + 32, out + 32, X[1], Y[1]); \ | ||||
if(numbytes >= 96) \ | ||||
XOR_STORE(in + 64, out + 64, X[2], Y[2]); \ | ||||
if(numbytes >= 128) \ | ||||
XOR_STORE(in + 96, out + 96, X[3], Y[3]); \ | ||||
\ | ||||
return 0 | ||||
if (numbytes == 16) { | static int speck_encrypt_xor (unsigned char *out, const unsigned char *in, u64 n | |||
x[0] = nonce[1]; y[0]=nonce[0]; nonce[0]++; | once[], speck_context_t *ctx, int numbytes) { | |||
Encrypt (x, y, ctx->key, 1); | ||||
((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0]; | ||||
return 0; | ||||
} | ||||
SET1 (X[0], nonce[1]); SET2 (Y[0], nonce[0]); | ||||
if (numbytes == 32) | ||||
Encrypt (X, Y, ctx->rk, 2); | ||||
else { | ||||
X[1] = X[0]; SET2 (Y[1], nonce[0]); | ||||
if (numbytes == 64) | ||||
Encrypt (X, Y, ctx->rk, 4); | ||||
else { | ||||
X[2] = X[0]; SET2 (Y[2], nonce[0]); | ||||
if (numbytes == 96) | ||||
Encrypt (X, Y, ctx->rk, 6); | ||||
else { | ||||
X[3] = X[0]; SET2 (Y[3], nonce[0]); | ||||
Encrypt (X, Y, ctx->rk, 8); | ||||
} | ||||
} | ||||
} | ||||
XOR_STORE (in, out, X[0], Y[0]); | ||||
if (numbytes >= 64) | ||||
XOR_STORE (in + 32, out + 32, X[1], Y[1]); | ||||
if (numbytes >= 96) | ||||
XOR_STORE (in + 64, out + 64, X[2], Y[2]); | ||||
if (numbytes >= 128) | ||||
XOR_STORE (in + 96, out + 96, X[3], Y[3]); | ||||
return 0; | if(ctx->keysize == 256) { | |||
Encrypt_Dispatcher(256); | ||||
} else { | ||||
Encrypt_Dispatcher(128); | ||||
} | ||||
} | } | |||
int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long i | static int internal_speck_ctr (unsigned char *out, const unsigned char *in, unsi | |||
nlen, | gned long long inlen, | |||
const unsigned char *n, speck_context_t *ctx) { | const unsigned char *n, speck_context_t *ctx) { | |||
int i; | ||||
u64 nonce[2]; | ||||
unsigned char block[16]; | ||||
u64 *const block64 = (u64 *)block; | ||||
if (!inlen) | int i; | |||
return 0; | u64 nonce[2]; | |||
unsigned char block[16]; | ||||
u64 *const block64 = (u64 *)block; | ||||
if(!inlen) | ||||
return 0; | ||||
nonce[0] = ((u64 *)n)[0]; | ||||
nonce[1] = ((u64 *)n)[1]; | ||||
while(inlen >= 128) { | ||||
speck_encrypt_xor(out, in, nonce, ctx, 128); | ||||
in += 128; inlen -= 128; out += 128; | ||||
} | ||||
nonce[0] = ((u64 *)n)[0]; | if(inlen >= 96) { | |||
nonce[1] = ((u64 *)n)[1]; | speck_encrypt_xor(out, in, nonce, ctx, 96); | |||
in += 96; inlen -= 96; out += 96; | ||||
} | ||||
while (inlen >= 128) { | if(inlen >= 64) { | |||
speck_encrypt_xor (out, in, nonce, ctx, 128); | speck_encrypt_xor(out, in, nonce, ctx, 64); | |||
in += 128; inlen -= 128; out += 128; | in += 64; inlen -= 64; out += 64; | |||
} | } | |||
if (inlen >= 96) { | if(inlen >= 32) { | |||
speck_encrypt_xor (out, in, nonce, ctx, 96); | speck_encrypt_xor(out, in, nonce, ctx, 32); | |||
in += 96; inlen -= 96; out += 96; | in += 32; inlen -= 32; out += 32; | |||
} | } | |||
if (inlen >= 64) { | if(inlen >= 16) { | |||
speck_encrypt_xor (out, in, nonce, ctx, 64); | speck_encrypt_xor(block, in, nonce, ctx, 16); | |||
in += 64; inlen -= 64; out += 64; | ((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0]; | |||
} | ((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1]; | |||
in += 16; inlen -= 16; out += 16; | ||||
} | ||||
if (inlen >= 32) { | if(inlen > 0) { | |||
speck_encrypt_xor (out, in, nonce, ctx, 32); | speck_encrypt_xor(block, in, nonce, ctx, 16); | |||
in += 32; inlen -= 32; out += 32; | for(i = 0; i < inlen; i++) | |||
} | out[i] = block[i] ^ in[i]; | |||
} | ||||
if (inlen >= 16) { | return 0; | |||
speck_encrypt_xor (block, in, nonce, ctx, 16); | } | |||
((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0]; | ||||
((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1]; | ||||
in += 16; inlen -= 16; out += 16; | ||||
} | ||||
if (inlen > 0) { | static int speck_expand_key (speck_context_t *ctx, const unsigned char *k, int k | |||
speck_encrypt_xor (block, in, nonce, ctx, 16); | eysize) { | |||
for (i = 0; i < inlen; i++) | ||||
out[i] = block[i] ^ in[i]; | ||||
} | ||||
return 0; | u64 K[4]; | |||
} | size_t i; | |||
int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { | for(i = 0; i < (keysize >> 6); i++) | |||
K[i] = ((u64 *)k)[i]; | ||||
u64 K[4]; | // 128 bit has only two keys A and B thus replacing both C and D with B then | |||
size_t i; | if(keysize == 128) { | |||
for (i = 0; i < numkeywords; i++) | EK(K[0], K[1], K[1], K[1], ctx->rk, ctx->key); | |||
K[i] = ((u64 *)k)[i]; | } else { | |||
EK(K[0], K[1], K[2], K[3], ctx->rk, ctx->key); | ||||
} | ||||
EK (K[0], K[1], K[2], K[3], ctx->rk, ctx->key); | ctx->keysize = keysize; | |||
return 0; | return 0; | |||
} | } | |||
#else // plain C ------------------------------------------------------ ---------- | #else // plain C ----------------------------------------------------- ----------------------------------- | |||
#define ROR(x,r) (((x)>>(r))|((x)<<(64-(r)))) | #define ROR(x,r) (((x)>>(r))|((x)<<(64-(r)))) | |||
#define ROL(x,r) (((x)<<(r))|((x)>>(64-(r)))) | #define ROL(x,r) (((x)<<(r))|((x)>>(64-(r)))) | |||
#define R(x,y,k) (x=ROR(x,8), x+=y, x^=k, y=ROL(y,3), y^=x) | #define R(x,y,k) (x=ROR(x,8), x+=y, x^=k, y=ROL(y,3), y^=x) | |||
static int speck_encrypt (u64 *u, u64 *v, speck_context_t *ctx) { | static int speck_encrypt (u64 *u, u64 *v, speck_context_t *ctx, int numrounds) { | |||
u64 i, x = *u, y = *v; | ||||
for (i = 0; i < 34; i++) | ||||
R (x, y, ctx->key[i]); | ||||
*u = x; *v = y; | ||||
return 0; | u64 i, x = *u, y = *v; | |||
} | ||||
int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long i | ||||
nlen, | ||||
const unsigned char *n, speck_context_t *ctx) { | ||||
u64 i, nonce[2], x, y, t; | for(i = 0; i < numrounds; i++) | |||
unsigned char *block = malloc (16); | R(x, y, ctx->key[i]); | |||
*u = x; *v = y; | ||||
if (!inlen) { | ||||
free (block); | ||||
return 0; | return 0; | |||
} | ||||
nonce[0] = htole64 ( ((u64*)n)[0] ); | ||||
nonce[1] = htole64 ( ((u64*)n)[1] ); | ||||
t=0; | ||||
while (inlen >= 16) { | ||||
x = nonce[1]; y = nonce[0]; nonce[0]++; | ||||
speck_encrypt (&x, &y, ctx); | ||||
((u64 *)out)[1+t] = htole64 (x ^ ((u64 *)in)[1+t]); | ||||
((u64 *)out)[0+t] = htole64 (y ^ ((u64 *)in)[0+t]); | ||||
t += 2; | ||||
inlen -= 16; | ||||
} | ||||
if (inlen > 0) { | ||||
x = nonce[1]; y = nonce[0]; | ||||
speck_encrypt (&x, &y, ctx); | ||||
((u64 *)block)[1] = htole64 (x); ((u64 *)block)[0] = htole64 (y); | ||||
for (i = 0; i < inlen; i++) | ||||
out[i + 8*t] = block[i] ^ in[i + 8*t]; | ||||
} | ||||
free (block); | ||||
return 0; | ||||
} | } | |||
int speck_expand_key (const unsigned char *k, speck_context_t *ctx) { | static int internal_speck_ctr (unsigned char *out, const unsigned char *in, unsi | |||
gned long long inlen, | ||||
const unsigned char *n, speck_context_t *ctx) { | ||||
u64 K[4]; | u64 i, nonce[2], x, y, t; | |||
u64 i; | unsigned char *block = malloc(16); | |||
int numrounds = (ctx->keysize == 256)?34:32; | ||||
if(!inlen) { | ||||
free(block); | ||||
return 0; | ||||
} | ||||
nonce[0] = htole64( ((u64*)n)[0] ); | ||||
nonce[1] = htole64( ((u64*)n)[1] ); | ||||
for (i = 0; i < 4; i++) | t=0; | |||
K[i] = htole64 ( ((u64 *)k)[i] ); | while(inlen >= 16) { | |||
x = nonce[1]; y = nonce[0]; nonce[0]++; | ||||
speck_encrypt(&x, &y, ctx, numrounds); | ||||
((u64 *)out)[1+t] = htole64(x ^ ((u64 *)in)[1+t]); | ||||
((u64 *)out)[0+t] = htole64(y ^ ((u64 *)in)[0+t]); | ||||
t += 2; | ||||
inlen -= 16; | ||||
} | ||||
for (i = 0; i < 33; i += 3) { | if(inlen > 0) { | |||
ctx->key[i ] = K[0]; | x = nonce[1]; y = nonce[0]; | |||
R (K[1], K[0], i ); | speck_encrypt(&x, &y, ctx, numrounds); | |||
ctx->key[i+1] = K[0]; | ((u64 *)block)[1] = htole64(x); ((u64 *)block)[0] = htole64(y); | |||
R (K[2], K[0], i + 1); | for(i = 0; i < inlen; i++) | |||
ctx->key[i+2] = K[0]; | out[i + 8*t] = block[i] ^ in[i + 8*t]; | |||
R (K[3], K[0], i + 2); | } | |||
} | ||||
ctx->key[33] = K[0]; | ||||
return 1; | ||||
} | ||||
#endif // AVX, SSE, NEON, plain C -------------------------------------- ---------- | free(block); | |||
// cipher SPECK -- 128 bit block size -- 128 bit key size -- CTR mode | return 0; | |||
// used for header encryption, thus the prefix 'he_' | } | |||
// for now: just plain C -- AVX, SSE, NEON might follow | ||||
#define ROR64(x,r) (((x)>>(r))|((x)<<(64-(r)))) | static int speck_expand_key (speck_context_t *ctx, const unsigned char *k, int k | |||
#define ROL64(x,r) (((x)<<(r))|((x)>>(64-(r)))) | eysize) { | |||
#define R64(x,y,k) (x=ROR64(x,8), x+=y, x^=k, y=ROL64(y,3), y^=x) | ||||
static int speck_encrypt_he (u64 *u, u64 *v, speck_context_t *ctx) { | u64 K[4]; | |||
u64 i; | ||||
u64 i, x=*u, y=*v; | for(i = 0; i < (keysize >> 6); i++) | |||
K[i] = htole64( ((u64 *)k)[i] ); | ||||
for (i = 0; i < 32; i++) | for(i = 0; i < 33; i += 3) { | |||
R64 (x, y, ctx->key[i]); | ctx->key[i ] = K[0]; | |||
R(K[1], K[0], i ); | ||||
if(keysize == 256) { | ||||
ctx->key[i+1] = K[0]; | ||||
R(K[2], K[0], i + 1); | ||||
ctx->key[i+2] = K[0]; | ||||
R(K[3], K[0], i + 2); | ||||
} else { | ||||
// counter the i += 3 to make the loop go one by one in this case | ||||
// we can afford the unused 31 and 32 | ||||
i -= 2; | ||||
} | ||||
} | ||||
ctx->key[33] = K[0]; | ||||
*u = x; *v = y; | ctx->keysize = keysize; | |||
return 0; | return 1; | |||
} | } | |||
int speck_he (unsigned char *out, const unsigned char *in, unsigned long long in | #endif // AVX, SSE, NEON, plain C ------------------------------------- | |||
len, | ----------------------------------- | |||
const unsigned char *n, speck_context_t *ctx) { | ||||
u64 i, nonce[2], x, y, t; | // this functions wraps the call to internal_speck_ctr functions which have slig | |||
unsigned char *block = malloc(16); | htly different | |||
// signature -- ctx by value for SSE with SPECK_CTX_BYVAL defined in speck.h, by | ||||
name otherwise | ||||
int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long i | ||||
nlen, | ||||
const unsigned char *n, speck_context_t *ctx) { | ||||
if (!inlen) { | return internal_speck_ctr(out, in, inlen, n, | |||
free (block); | #if defined (SPECK_CTX_BYVAL) | |||
return 0; | *ctx); | |||
} | #else | |||
nonce[0] = htole64 ( ((u64*)n)[0] ); | ctx); | |||
nonce[1] = htole64 ( ((u64*)n)[1] ); | #endif | |||
} | ||||
t = 0; | // create context loaded with round keys ready for use, key size either 128 or 2 | |||
while (inlen >= 16) { | 56 (bits) | |||
x = nonce[1]; y = nonce[0]; nonce[0]++; | int speck_init (speck_context_t **ctx, const unsigned char *k, int keysize) { | |||
speck_encrypt_he (&x, &y, ctx); | ||||
((u64 *)out)[1+t] = htole64 (x ^ ((u64 *)in)[1+t]); | ||||
((u64 *)out)[0+t] = htole64 (y ^ ((u64 *)in)[0+t]); | ||||
t += 2; | ||||
inlen -= 16; | ||||
} | ||||
if (inlen > 0) { | #if defined (SPECK_ALIGNED_CTX) | |||
x = nonce[1]; y = nonce[0]; | *ctx = (speck_context_t*)_mm_malloc(sizeof(speck_context_t), SPECK_ALIGNED_C | |||
speck_encrypt_he (&x, &y, ctx); | TX); | |||
((u64 *)block)[1] = htole64 (x); ((u64 *)block)[0] = htole64 (y); | #else | |||
for (i = 0; i < inlen; i++) | *ctx = (speck_context_t*)calloc(1, sizeof(speck_context_t)); | |||
out[i+8*t] = block[i] ^ in[i+8*t]; | #endif | |||
} | if(!(*ctx)) { | |||
return -1; | ||||
} | ||||
free(block); | return speck_expand_key(*ctx, k, keysize); | |||
return 0; | ||||
} | } | |||
int speck_expand_key_he (const unsigned char *k, speck_context_t *ctx) { | int speck_deinit (speck_context_t *ctx) { | |||
u64 A, B; | ||||
u64 i; | ||||
A = htole64 ( ((u64 *)k)[0] ); | if(ctx) { | |||
B = htole64 ( ((u64 *)k)[1] ); | #if defined (SPECK_ALIGNED_CTX) | |||
_mm_free(ctx); | ||||
#else | ||||
free(ctx); | ||||
#endif | ||||
} | ||||
for (i = 0; i < 32; i++) { | return 0; | |||
ctx->key[i] = A; | ||||
R64 ( B, A, i); | ||||
} | ||||
return 1; | ||||
} | } | |||
// ----------------------------------------------------------------------------- ----------- | // ----------------------------------------------------------------------------- ----------------------------------- | |||
// cipher SPECK -- 96 bit block size -- 96 bit key size -- ECB mode | // cipher SPECK -- 128 bit block size -- 128 bit key size -- ECB mode (decrypt o nly) | |||
// follows endianess rules as used in official implementation guide and NOT as i n original 2013 cipher presentation | // follows endianess rules as used in official implementation guide and NOT as i n original 2013 cipher presentation | |||
// used for IV in header encryption, thus the prefix 'he_iv_' | // used for IV in header encryption (one block) and challenge encryption (user/p assword) | |||
// for now: just plain C -- probably no need for AVX, SSE, NEON | // for now: just plain C -- probably no need for AVX, SSE, NEON | |||
// prerequisite: lower 16 bit reset | #define ROTL64(x,r) (((x)<<(r))|((x)>>(64-(r)))) | |||
#define ROTL48(x,r) (((((x)<<(r)) | (x>>(48-(r)))) >> 16) << 16) | #define ROTR64(x,r) (((x)>>(r))|((x)<<(64-(r)))) | |||
#define ROTR48(x,r) (((((x)>>(r)) | ((x)<<(48-(r)))) >> 16) << 16) | #define DR128(x,y,k) (y^=x, y=ROTR64(y,3), x^=k, x-=y, x=ROTL64(x,8)) | |||
#define ER96(x,y,k) (x=ROTR48(x,8), x+=y, x^=k, y=ROTL48(y,3), y^=x) | #define ER128(x,y,k) (x=(ROTR64(x,8)+y)^k, y=ROTL64(y,3)^x) | |||
#define DR96(x,y,k) (y^=x, y=ROTR48(y,3), x^=k, x-=y, x=ROTL48(x,8)) | ||||
int speck_he_iv_encrypt (unsigned char *inout, speck_context_t *ctx) { | ||||
u64 x, y; | int speck_128_decrypt (unsigned char *inout, speck_context_t *ctx) { | |||
int i; | ||||
x = htole64 ( *(u64*)&inout[0] ); x <<= 16; | u64 x, y; | |||
y = htole64 ( *(u64*)&inout[4] ); y >>= 16; y <<= 16; | int i; | |||
for (i = 0; i < 28; i++) | ||||
ER96 (y, x, ctx->key[i]); | ||||
x >>= 16; x |= y << 32; | ||||
y >>= 32; | ||||
((u64*)inout)[0] = le64toh (x); | ||||
((u32*)inout)[2] = le32toh (y); | ||||
return 0; | ||||
} | ||||
int speck_he_iv_decrypt (unsigned char *inout, speck_context_t *ctx) { | ||||
u64 x, y; | ||||
int i; | ||||
x = htole64 ( *(u64*)&inout[0] ); x <<= 16; | ||||
y = htole64 ( *(u64*)&inout[4] ); y >>= 16; y <<= 16; | ||||
for (i = 27; i >= 0; i--) | ||||
DR96 (y, x, ctx->key[i]); | ||||
x >>= 16; x |= y << 32; | ||||
y >>= 32; | ||||
((u64*)inout)[0] = le64toh (x); | ||||
((u32*)inout)[2] = le32toh (y); | ||||
return 0; | ||||
} | ||||
int speck_expand_key_he_iv (const unsigned char *k, speck_context_t *ctx) { | x = le64toh( *(u64*)&inout[8] ); | |||
y = le64toh( *(u64*)&inout[0] ); | ||||
u64 A, B; | for(i = 31; i >= 0; i--) | |||
int i; | DR128(x, y, ctx->key[i]); | |||
A = htole64 ( *(u64 *)&k[0] ); A <<= 16; | ((u64*)inout)[1] = htole64(x); | |||
B = htole64 ( *(u64 *)&k[4] ); B >>= 16; B <<= 16; | ((u64*)inout)[0] = htole64(y); | |||
for (i = 0; i < 28; i++) { | return 0; | |||
ctx->key[i] = A; | ||||
ER96 ( B, A, i << 16); | ||||
} | ||||
return 1; | ||||
} | } | |||
// ----------------------------------------------------------------------------- | int speck_128_encrypt (unsigned char *inout, speck_context_t *ctx) { | |||
----------- | ||||
/* | ||||
// code for testing -- to be removed when finished | ||||
#include <stdio.h> // for testing | ||||
#include <string.h> | ||||
int speck_test () { | ||||
uint8_t key[32] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | ||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, | ||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | ||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F }; | ||||
uint8_t k96[12] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, | ||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D }; | ||||
uint8_t iv[16] = { 0x70, 0x6f, 0x6f, 0x6e, 0x65, 0x72, 0x2e, 0x20, | ||||
0x49, 0x6e, 0x20, 0x74, 0x68, 0x6f, 0x73, 0x65 }; | ||||
uint8_t xv[16] = { 0x20, 0x6d, 0x61, 0x64, 0x65, 0x20, 0x69, 0x74, | ||||
0x20, 0x65, 0x71, 0x75, 0x69, 0x76, 0x61, 0x6c }; | ||||
uint8_t p96[12] = { 0x20, 0x75, 0x73, 0x61, 0x67, 0x65, | ||||
0x2C, 0x20, 0x68, 0x6F, 0x77, 0x65 }; | ||||
uint8_t pt[16] = { 0x00 }; | ||||
// expected outcome (according to pp. 35 & 36 of Implementation Guide 1.1 as o | ||||
f 2019) and | ||||
// original cipher presentation as of 2013 in which notably a different endian | ||||
ess is used | ||||
uint8_t ct[16] = { 0x43, 0x8f, 0x18, 0x9c, 0x8d, 0xb4, 0xee, 0x4e, | ||||
0x3e, 0xf5, 0xc0, 0x05, 0x04, 0x01, 0x09, 0x41 }; | ||||
uint8_t xt[16] = { 0x18, 0x0d, 0x57, 0x5c, 0xdf, 0xfe, 0x60, 0x78, | ||||
0x65, 0x32, 0x78, 0x79, 0x51, 0x98, 0x5d, 0xa6 }; | ||||
uint8_t x96[12] = { 0xAA, 0x79, 0x8F, 0xDE, 0xBD, 0x62, | ||||
0x78, 0x71, 0xAB, 0x09, 0x4D, 0x9E }; | ||||
speck_context_t ctx; | ||||
speck_expand_key (key, &ctx); | u64 x, y; | |||
#if defined (SPECK_CTX_BYVAL) | int i; | |||
speck_ctr (pt, pt, 16, iv, ctx); | ||||
#else | ||||
speck_ctr (pt, pt, 16, iv, &ctx); | ||||
#endif | ||||
u64 i; | ||||
fprintf (stderr, "rk00: %016llx\n", ctx.key[0]); | ||||
fprintf (stderr, "rk33: %016llx\n", ctx.key[33]); | ||||
fprintf (stderr, "out : %016lx\n", *(uint64_t*)pt); | ||||
fprintf (stderr, "mem : " ); for (i=0; i < 16; i++) fprintf (stderr, "%02x ", | ||||
pt[i]); fprintf (stderr, "\n"); | ||||
int ret = 1; | ||||
for (i=0; i < 16; i++) | ||||
if (pt[i] != ct[i]) ret = 0; | ||||
memset (pt, 0, 16); | ||||
speck_expand_key_he (key, &ctx); | ||||
speck_he (pt, pt, 16, xv, &ctx); | ||||
fprintf (stderr, "rk00: %016llx\n", ctx.key[0]); | ||||
fprintf (stderr, "rk31: %016llx\n", ctx.key[31]); | ||||
fprintf (stderr, "out : %016lx\n", *(uint64_t*)pt); | ||||
fprintf (stderr, "mem : " ); for (i=0; i < 16; i++) fprintf (stderr, "%02x ", | ||||
pt[i]); fprintf (stderr, "\n"); | ||||
for (i=0; i < 16; i++) | x = le64toh( *(u64*)&inout[8] ); | |||
if (pt[i] != xt[i]) ret = 0; | y = le64toh( *(u64*)&inout[0] ); | |||
speck_expand_key_he_iv (k96, &ctx); | for(i = 0; i < 32; i++) | |||
speck_he_iv_encrypt (p96, &ctx); | ER128(x, y, ctx->key[i]); | |||
// speck_he_iv_decrypt (p96, &ctx); | ||||
// speck_he_iv_encrypt (p96, &ctx); | ||||
fprintf (stderr, "rk00: %016llx\n", ctx.key[0]); | ((u64*)inout)[1] = htole64(x); | |||
fprintf (stderr, "rk27: %016llx\n", ctx.key[27]); | ((u64*)inout)[0] = htole64(y); | |||
fprintf (stderr, "out : %016lx\n", *(uint64_t*)p96); | ||||
fprintf (stderr, "mem : " ); for (i=0; i < 12; i++) fprintf (stderr, "%02x ", | ||||
p96[i]); fprintf (stderr, "\n"); | ||||
for (i=0; i < 12; i++) | return 0; | |||
if (p96[i] != x96[i]) ret = 0; | ||||
return (ret); | ||||
} | ||||
int main (int argc, char* argv[]) { | ||||
fprintf (stdout, "SPECK SELF TEST RESULT: %u\n", speck_test (0,NULL)); | ||||
} | } | |||
*/ | ||||
End of changes. 123 change blocks. | ||||
664 lines changed or deleted | 953 lines changed or added |