"Fossies" - the Fresh Open Source Software Archive 
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "SerpentFast.c" see the
Fossies "Dox" file reference documentation.
1 /*
2 * Serpent
3 * (C) 1999-2007 Jack Lloyd
4 *
5 * Botan is released under the Simplified BSD License (see license.txt)
6 */
7
8 #include "SerpentFast.h"
9 #include "SerpentFast_sbox.h"
10 #include "Common/Endian.h"
11 #if !defined(_UEFI)
12 #include <memory.h>
13 #include <stdlib.h>
14 #endif
15 #include "cpu.h"
16 #include "misc.h"
17
18 #if BYTE_ORDER == BIG_ENDIAN
19
20 #define BOTAN_ENDIAN_N2B(x) (x)
21 #define BOTAN_ENDIAN_B2N(x) (x)
22
23 #define BOTAN_ENDIAN_N2L(x) bswap_32(x)
24 #define BOTAN_ENDIAN_L2N(x) bswap_32(x)
25
26 #elif BYTE_ORDER == LITTLE_ENDIAN
27
28 #define BOTAN_ENDIAN_N2L(x) (x)
29 #define BOTAN_ENDIAN_L2N(x) (x)
30
31 #define BOTAN_ENDIAN_N2B(x) bswap_32(x)
32 #define BOTAN_ENDIAN_B2N(x) bswap_32(x)
33
34 #endif
35
36 #if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
37 extern void serpent_simd_encrypt_blocks_4(const unsigned __int8 in[], unsigned __int8 out[], unsigned __int32* round_key);
38 extern void serpent_simd_decrypt_blocks_4(const unsigned __int8 in[], unsigned __int8 out[], unsigned __int32* round_key);
39 #endif
40
41 /*
42 * Serpent's Linear Transform
43 */
44 #define transform(B0,B1,B2,B3) \
45 do { \
46 B0 = rotl32(B0, 13); B2 = rotl32(B2, 3); \
47 B1 ^= B0 ^ B2; B3 ^= B2 ^ (B0 << 3); \
48 B1 = rotl32(B1, 1); B3 = rotl32(B3, 7); \
49 B0 ^= B1 ^ B3; B2 ^= B3 ^ (B1 << 7); \
50 B0 = rotl32(B0, 5); B2 = rotl32(B2, 22); \
51 } while (0);
52
53 /*
54 * Serpent's Inverse Linear Transform
55 */
56 #define i_transform(B0,B1,B2,B3) \
57 do { \
58 B2 = rotr32(B2, 22); B0 = rotr32(B0, 5); \
59 B2 ^= B3 ^ (B1 << 7); B0 ^= B1 ^ B3; \
60 B3 = rotr32(B3, 7); B1 = rotr32(B1, 1); \
61 B3 ^= B2 ^ (B0 << 3); B1 ^= B0 ^ B2; \
62 B2 = rotr32(B2, 3); B0 = rotr32(B0, 13); \
63 } while (0);
64
65
66 /*
67 * XOR a key block with a data block
68 */
69 #define key_xor(round, B0, B1, B2, B3) \
70 B0 ^= round_key[4*round ]; \
71 B1 ^= round_key[4*round+1]; \
72 B2 ^= round_key[4*round+2]; \
73 B3 ^= round_key[4*round+3];
74
75 /*
76 * Serpent Encryption
77 */
78 void serpent_encrypt_blocks(const unsigned __int8* in, unsigned __int8* out, size_t blocks, unsigned __int8 *ks)
79 {
80 unsigned __int32 B0, B1, B2, B3;
81 unsigned __int32* round_key = ((unsigned __int32*) ks) + 8;
82 size_t i;
83 #if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && (!defined (DEBUG) || !defined (TC_WINDOWS_DRIVER))
84 if(HasSSE2() && (blocks >= 4))
85 {
86 while(blocks >= 4)
87 {
88 serpent_simd_encrypt_blocks_4(in, out, round_key);
89 in += 4 * 16;
90 out += 4 * 16;
91 blocks -= 4;
92 }
93 }
94 #endif
95
96 for(i = 0; i != blocks; ++i)
97 {
98 memcpy (&B0, in + 0, 4);
99 memcpy (&B1, in + 4, 4);
100 memcpy (&B2, in + 8, 4);
101 memcpy (&B3, in + 12, 4);
102 B0 = BOTAN_ENDIAN_N2L (B0);
103 B1 = BOTAN_ENDIAN_N2L (B1);
104 B2 = BOTAN_ENDIAN_N2L (B2);
105 B3 = BOTAN_ENDIAN_N2L (B3);
106
107 key_xor( 0,B0,B1,B2,B3); SBoxE1(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
108 key_xor( 1,B0,B1,B2,B3); SBoxE2(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
109 key_xor( 2,B0,B1,B2,B3); SBoxE3(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
110 key_xor( 3,B0,B1,B2,B3); SBoxE4(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
111 key_xor( 4,B0,B1,B2,B3); SBoxE5(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
112 key_xor( 5,B0,B1,B2,B3); SBoxE6(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
113 key_xor( 6,B0,B1,B2,B3); SBoxE7(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
114 key_xor( 7,B0,B1,B2,B3); SBoxE8(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
115 key_xor( 8,B0,B1,B2,B3); SBoxE1(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
116 key_xor( 9,B0,B1,B2,B3); SBoxE2(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
117 key_xor(10,B0,B1,B2,B3); SBoxE3(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
118 key_xor(11,B0,B1,B2,B3); SBoxE4(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
119 key_xor(12,B0,B1,B2,B3); SBoxE5(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
120 key_xor(13,B0,B1,B2,B3); SBoxE6(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
121 key_xor(14,B0,B1,B2,B3); SBoxE7(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
122 key_xor(15,B0,B1,B2,B3); SBoxE8(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
123 key_xor(16,B0,B1,B2,B3); SBoxE1(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
124 key_xor(17,B0,B1,B2,B3); SBoxE2(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
125 key_xor(18,B0,B1,B2,B3); SBoxE3(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
126 key_xor(19,B0,B1,B2,B3); SBoxE4(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
127 key_xor(20,B0,B1,B2,B3); SBoxE5(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
128 key_xor(21,B0,B1,B2,B3); SBoxE6(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
129 key_xor(22,B0,B1,B2,B3); SBoxE7(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
130 key_xor(23,B0,B1,B2,B3); SBoxE8(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
131 key_xor(24,B0,B1,B2,B3); SBoxE1(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
132 key_xor(25,B0,B1,B2,B3); SBoxE2(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
133 key_xor(26,B0,B1,B2,B3); SBoxE3(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
134 key_xor(27,B0,B1,B2,B3); SBoxE4(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
135 key_xor(28,B0,B1,B2,B3); SBoxE5(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
136 key_xor(29,B0,B1,B2,B3); SBoxE6(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
137 key_xor(30,B0,B1,B2,B3); SBoxE7(unsigned __int32,B0,B1,B2,B3); transform(B0,B1,B2,B3);
138 key_xor(31,B0,B1,B2,B3); SBoxE8(unsigned __int32,B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3);
139
140 B0 = BOTAN_ENDIAN_L2N(B0);
141 B1 = BOTAN_ENDIAN_L2N(B1);
142 B2 = BOTAN_ENDIAN_L2N(B2);
143 B3 = BOTAN_ENDIAN_L2N(B3);
144 memcpy(out + 0, &B0, 4);
145 memcpy(out + 4, &B1, 4);
146 memcpy(out + 8, &B2, 4);
147 memcpy(out + 12, &B3, 4);
148
149 in += 16;
150 out += 16;
151 }
152 }
153
154 /*
155 * Serpent Decryption
156 */
157 void serpent_decrypt_blocks(const unsigned __int8* in, unsigned __int8* out, size_t blocks, unsigned __int8 *ks)
158 {
159 unsigned __int32 B0, B1, B2, B3;
160 unsigned __int32* round_key = ((unsigned __int32*) ks) + 8;
161 size_t i;
162 #if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && (!defined (DEBUG) || !defined (TC_WINDOWS_DRIVER))
163 if(HasSSE2() && (blocks >= 4))
164 {
165 while(blocks >= 4)
166 {
167 serpent_simd_decrypt_blocks_4(in, out, round_key);
168 in += 4 * 16;
169 out += 4 * 16;
170 blocks -= 4;
171 }
172 }
173 #endif
174
175 for(i = 0; i != blocks; ++i)
176 {
177 memcpy (&B0, in + 0, 4);
178 memcpy (&B1, in + 4, 4);
179 memcpy (&B2, in + 8, 4);
180 memcpy (&B3, in + 12, 4);
181 B0 = BOTAN_ENDIAN_N2L (B0);
182 B1 = BOTAN_ENDIAN_N2L (B1);
183 B2 = BOTAN_ENDIAN_N2L (B2);
184 B3 = BOTAN_ENDIAN_N2L (B3);
185
186 key_xor(32,B0,B1,B2,B3); SBoxD8(unsigned __int32,B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3);
187 i_transform(B0,B1,B2,B3); SBoxD7(unsigned __int32,B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3);
188 i_transform(B0,B1,B2,B3); SBoxD6(unsigned __int32,B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3);
189 i_transform(B0,B1,B2,B3); SBoxD5(unsigned __int32,B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3);
190 i_transform(B0,B1,B2,B3); SBoxD4(unsigned __int32,B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3);
191 i_transform(B0,B1,B2,B3); SBoxD3(unsigned __int32,B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3);
192 i_transform(B0,B1,B2,B3); SBoxD2(unsigned __int32,B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3);
193 i_transform(B0,B1,B2,B3); SBoxD1(unsigned __int32,B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3);
194 i_transform(B0,B1,B2,B3); SBoxD8(unsigned __int32,B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3);
195 i_transform(B0,B1,B2,B3); SBoxD7(unsigned __int32,B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3);
196 i_transform(B0,B1,B2,B3); SBoxD6(unsigned __int32,B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3);
197 i_transform(B0,B1,B2,B3); SBoxD5(unsigned __int32,B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3);
198 i_transform(B0,B1,B2,B3); SBoxD4(unsigned __int32,B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3);
199 i_transform(B0,B1,B2,B3); SBoxD3(unsigned __int32,B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3);
200 i_transform(B0,B1,B2,B3); SBoxD2(unsigned __int32,B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3);
201 i_transform(B0,B1,B2,B3); SBoxD1(unsigned __int32,B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3);
202 i_transform(B0,B1,B2,B3); SBoxD8(unsigned __int32,B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3);
203 i_transform(B0,B1,B2,B3); SBoxD7(unsigned __int32,B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3);
204 i_transform(B0,B1,B2,B3); SBoxD6(unsigned __int32,B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3);
205 i_transform(B0,B1,B2,B3); SBoxD5(unsigned __int32,B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3);
206 i_transform(B0,B1,B2,B3); SBoxD4(unsigned __int32,B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3);
207 i_transform(B0,B1,B2,B3); SBoxD3(unsigned __int32,B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3);
208 i_transform(B0,B1,B2,B3); SBoxD2(unsigned __int32,B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3);
209 i_transform(B0,B1,B2,B3); SBoxD1(unsigned __int32,B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3);
210 i_transform(B0,B1,B2,B3); SBoxD8(unsigned __int32,B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3);
211 i_transform(B0,B1,B2,B3); SBoxD7(unsigned __int32,B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3);
212 i_transform(B0,B1,B2,B3); SBoxD6(unsigned __int32,B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3);
213 i_transform(B0,B1,B2,B3); SBoxD5(unsigned __int32,B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3);
214 i_transform(B0,B1,B2,B3); SBoxD4(unsigned __int32,B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3);
215 i_transform(B0,B1,B2,B3); SBoxD3(unsigned __int32,B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3);
216 i_transform(B0,B1,B2,B3); SBoxD2(unsigned __int32,B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3);
217 i_transform(B0,B1,B2,B3); SBoxD1(unsigned __int32,B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3);
218
219 B0 = BOTAN_ENDIAN_L2N(B0);
220 B1 = BOTAN_ENDIAN_L2N(B1);
221 B2 = BOTAN_ENDIAN_L2N(B2);
222 B3 = BOTAN_ENDIAN_L2N(B3);
223 memcpy(out + 0, &B0, 4);
224 memcpy(out + 4, &B1, 4);
225 memcpy(out + 8, &B2, 4);
226 memcpy(out + 12, &B3, 4);
227
228 in += 16;
229 out += 16;
230 }
231 }
232
233 #undef key_xor
234 #undef transform
235 #undef i_transform
236
237 /*
238 * Serpent Key Schedule
239 */
240 void serpent_set_key(const unsigned __int8 userKey[], unsigned __int8 *ks)
241 {
242 const unsigned __int32 PHI = 0x9E3779B9;
243 unsigned __int32* W = (unsigned __int32*) ks;
244 int i;
245 for(i = 0; i != 8; ++i)
246 {
247 memcpy (W + i, userKey + (i*4), 4);
248 W[i] = BOTAN_ENDIAN_N2L(W[i]);
249 }
250
251 for(i = 8; i != 140; ++i)
252 {
253 unsigned __int32 wi = W[i-8] ^ W[i-5] ^ W[i-3] ^ W[i-1] ^ PHI ^ (unsigned __int32)(i-8);
254 W[i] = rotl32(wi, 11);
255 }
256
257 SBoxE4(unsigned __int32,W[ 8],W[ 9],W[ 10],W[ 11]); SBoxE3(unsigned __int32,W[ 12],W[ 13],W[ 14],W[ 15]);
258 SBoxE2(unsigned __int32,W[ 16],W[ 17],W[ 18],W[ 19]); SBoxE1(unsigned __int32,W[ 20],W[ 21],W[ 22],W[ 23]);
259 SBoxE8(unsigned __int32,W[ 24],W[ 25],W[ 26],W[ 27]); SBoxE7(unsigned __int32,W[ 28],W[ 29],W[ 30],W[ 31]);
260 SBoxE6(unsigned __int32,W[ 32],W[ 33],W[ 34],W[ 35]); SBoxE5(unsigned __int32,W[ 36],W[ 37],W[ 38],W[ 39]);
261 SBoxE4(unsigned __int32,W[ 40],W[ 41],W[ 42],W[ 43]); SBoxE3(unsigned __int32,W[ 44],W[ 45],W[ 46],W[ 47]);
262 SBoxE2(unsigned __int32,W[ 48],W[ 49],W[ 50],W[ 51]); SBoxE1(unsigned __int32,W[ 52],W[ 53],W[ 54],W[ 55]);
263 SBoxE8(unsigned __int32,W[ 56],W[ 57],W[ 58],W[ 59]); SBoxE7(unsigned __int32,W[ 60],W[ 61],W[ 62],W[ 63]);
264 SBoxE6(unsigned __int32,W[ 64],W[ 65],W[ 66],W[ 67]); SBoxE5(unsigned __int32,W[ 68],W[ 69],W[ 70],W[ 71]);
265 SBoxE4(unsigned __int32,W[ 72],W[ 73],W[ 74],W[ 75]); SBoxE3(unsigned __int32,W[ 76],W[ 77],W[ 78],W[ 79]);
266 SBoxE2(unsigned __int32,W[ 80],W[ 81],W[ 82],W[ 83]); SBoxE1(unsigned __int32,W[ 84],W[ 85],W[ 86],W[ 87]);
267 SBoxE8(unsigned __int32,W[ 88],W[ 89],W[ 90],W[ 91]); SBoxE7(unsigned __int32,W[ 92],W[ 93],W[ 94],W[ 95]);
268 SBoxE6(unsigned __int32,W[ 96],W[ 97],W[ 98],W[ 99]); SBoxE5(unsigned __int32,W[100],W[101],W[102],W[103]);
269 SBoxE4(unsigned __int32,W[104],W[105],W[106],W[107]); SBoxE3(unsigned __int32,W[108],W[109],W[110],W[111]);
270 SBoxE2(unsigned __int32,W[112],W[113],W[114],W[115]); SBoxE1(unsigned __int32,W[116],W[117],W[118],W[119]);
271 SBoxE8(unsigned __int32,W[120],W[121],W[122],W[123]); SBoxE7(unsigned __int32,W[124],W[125],W[126],W[127]);
272 SBoxE6(unsigned __int32,W[128],W[129],W[130],W[131]); SBoxE5(unsigned __int32,W[132],W[133],W[134],W[135]);
273 SBoxE4(unsigned __int32,W[136],W[137],W[138],W[139]);
274 }