"Fossies" - the Fresh Open Source Software Archive 
Member "cryptsetup-2.4.3/lib/crypto_backend/argon2/opt.c" (13 Jan 2022, 10181 Bytes) of package /linux/misc/cryptsetup-2.4.3.tar.xz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "opt.c" see the
Fossies "Dox" file reference documentation.
1 /*
2 * Argon2 reference source code package - reference C implementations
3 *
4 * Copyright 2015
5 * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
6 *
7 * You may use this work under the terms of a Creative Commons CC0 1.0
8 * License/Waiver or the Apache Public License 2.0, at your option. The terms of
9 * these licenses can be found at:
10 *
11 * - CC0 1.0 Universal : https://creativecommons.org/publicdomain/zero/1.0
12 * - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0
13 *
14 * You should have received a copy of both of these licenses along with this
15 * software. If not, they may be obtained at the above URLs.
16 */
17
18 #include <stdint.h>
19 #include <string.h>
20 #include <stdlib.h>
21
22 #include "argon2.h"
23 #include "core.h"
24
25 #include "blake2/blake2.h"
26 #include "blake2/blamka-round-opt.h"
27
28 /*
29 * Function fills a new memory block and optionally XORs the old block over the new one.
30 * Memory must be initialized.
31 * @param state Pointer to the just produced block. Content will be updated(!)
32 * @param ref_block Pointer to the reference block
33 * @param next_block Pointer to the block to be XORed over. May coincide with @ref_block
34 * @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
35 * @pre all block pointers must be valid
36 */
37 #if defined(__AVX512F__)
38 static void fill_block(__m512i *state, const block *ref_block,
39 block *next_block, int with_xor) {
40 __m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK];
41 unsigned int i;
42
43 if (with_xor) {
44 for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
45 state[i] = _mm512_xor_si512(
46 state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
47 block_XY[i] = _mm512_xor_si512(
48 state[i], _mm512_loadu_si512((const __m512i *)next_block->v + i));
49 }
50 } else {
51 for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
52 block_XY[i] = state[i] = _mm512_xor_si512(
53 state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
54 }
55 }
56
57 for (i = 0; i < 2; ++i) {
58 BLAKE2_ROUND_1(
59 state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
60 state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
61 }
62
63 for (i = 0; i < 2; ++i) {
64 BLAKE2_ROUND_2(
65 state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i],
66 state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]);
67 }
68
69 for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
70 state[i] = _mm512_xor_si512(state[i], block_XY[i]);
71 _mm512_storeu_si512((__m512i *)next_block->v + i, state[i]);
72 }
73 }
74 #elif defined(__AVX2__)
75 static void fill_block(__m256i *state, const block *ref_block,
76 block *next_block, int with_xor) {
77 __m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
78 unsigned int i;
79
80 if (with_xor) {
81 for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
82 state[i] = _mm256_xor_si256(
83 state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
84 block_XY[i] = _mm256_xor_si256(
85 state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i));
86 }
87 } else {
88 for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
89 block_XY[i] = state[i] = _mm256_xor_si256(
90 state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
91 }
92 }
93
94 for (i = 0; i < 4; ++i) {
95 BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
96 state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
97 }
98
99 for (i = 0; i < 4; ++i) {
100 BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i],
101 state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
102 }
103
104 for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
105 state[i] = _mm256_xor_si256(state[i], block_XY[i]);
106 _mm256_storeu_si256((__m256i *)next_block->v + i, state[i]);
107 }
108 }
109 #else
110 static void fill_block(__m128i *state, const block *ref_block,
111 block *next_block, int with_xor) {
112 __m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
113 unsigned int i;
114
115 if (with_xor) {
116 for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
117 state[i] = _mm_xor_si128(
118 state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
119 block_XY[i] = _mm_xor_si128(
120 state[i], _mm_loadu_si128((const __m128i *)next_block->v + i));
121 }
122 } else {
123 for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
124 block_XY[i] = state[i] = _mm_xor_si128(
125 state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
126 }
127 }
128
129 for (i = 0; i < 8; ++i) {
130 BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
131 state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
132 state[8 * i + 6], state[8 * i + 7]);
133 }
134
135 for (i = 0; i < 8; ++i) {
136 BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
137 state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
138 state[8 * 6 + i], state[8 * 7 + i]);
139 }
140
141 for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
142 state[i] = _mm_xor_si128(state[i], block_XY[i]);
143 _mm_storeu_si128((__m128i *)next_block->v + i, state[i]);
144 }
145 }
146 #endif
147
148 static void next_addresses(block *address_block, block *input_block) {
149 /*Temporary zero-initialized blocks*/
150 #if defined(__AVX512F__)
151 __m512i zero_block[ARGON2_512BIT_WORDS_IN_BLOCK];
152 __m512i zero2_block[ARGON2_512BIT_WORDS_IN_BLOCK];
153 #elif defined(__AVX2__)
154 __m256i zero_block[ARGON2_HWORDS_IN_BLOCK];
155 __m256i zero2_block[ARGON2_HWORDS_IN_BLOCK];
156 #else
157 __m128i zero_block[ARGON2_OWORDS_IN_BLOCK];
158 __m128i zero2_block[ARGON2_OWORDS_IN_BLOCK];
159 #endif
160
161 memset(zero_block, 0, sizeof(zero_block));
162 memset(zero2_block, 0, sizeof(zero2_block));
163
164 /*Increasing index counter*/
165 input_block->v[6]++;
166
167 /*First iteration of G*/
168 fill_block(zero_block, input_block, address_block, 0);
169
170 /*Second iteration of G*/
171 fill_block(zero2_block, address_block, address_block, 0);
172 }
173
174 void fill_segment(const argon2_instance_t *instance,
175 argon2_position_t position) {
176 block *ref_block = NULL, *curr_block = NULL;
177 block address_block, input_block;
178 uint64_t pseudo_rand, ref_index, ref_lane;
179 uint32_t prev_offset, curr_offset;
180 uint32_t starting_index, i;
181 #if defined(__AVX512F__)
182 __m512i state[ARGON2_512BIT_WORDS_IN_BLOCK];
183 #elif defined(__AVX2__)
184 __m256i state[ARGON2_HWORDS_IN_BLOCK];
185 #else
186 __m128i state[ARGON2_OWORDS_IN_BLOCK];
187 #endif
188 int data_independent_addressing;
189
190 if (instance == NULL) {
191 return;
192 }
193
194 data_independent_addressing =
195 (instance->type == Argon2_i) ||
196 (instance->type == Argon2_id && (position.pass == 0) &&
197 (position.slice < ARGON2_SYNC_POINTS / 2));
198
199 if (data_independent_addressing) {
200 init_block_value(&input_block, 0);
201
202 input_block.v[0] = position.pass;
203 input_block.v[1] = position.lane;
204 input_block.v[2] = position.slice;
205 input_block.v[3] = instance->memory_blocks;
206 input_block.v[4] = instance->passes;
207 input_block.v[5] = instance->type;
208 }
209
210 starting_index = 0;
211
212 if ((0 == position.pass) && (0 == position.slice)) {
213 starting_index = 2; /* we have already generated the first two blocks */
214
215 /* Don't forget to generate the first block of addresses: */
216 if (data_independent_addressing) {
217 next_addresses(&address_block, &input_block);
218 }
219 }
220
221 /* Offset of the current block */
222 curr_offset = position.lane * instance->lane_length +
223 position.slice * instance->segment_length + starting_index;
224
225 if (0 == curr_offset % instance->lane_length) {
226 /* Last block in this lane */
227 prev_offset = curr_offset + instance->lane_length - 1;
228 } else {
229 /* Previous block */
230 prev_offset = curr_offset - 1;
231 }
232
233 memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
234
235 for (i = starting_index; i < instance->segment_length;
236 ++i, ++curr_offset, ++prev_offset) {
237 /*1.1 Rotating prev_offset if needed */
238 if (curr_offset % instance->lane_length == 1) {
239 prev_offset = curr_offset - 1;
240 }
241
242 /* 1.2 Computing the index of the reference block */
243 /* 1.2.1 Taking pseudo-random value from the previous block */
244 if (data_independent_addressing) {
245 if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
246 next_addresses(&address_block, &input_block);
247 }
248 pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
249 } else {
250 pseudo_rand = instance->memory[prev_offset].v[0];
251 }
252
253 /* 1.2.2 Computing the lane of the reference block */
254 ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
255
256 if ((position.pass == 0) && (position.slice == 0)) {
257 /* Can not reference other lanes yet */
258 ref_lane = position.lane;
259 }
260
261 /* 1.2.3 Computing the number of possible reference block within the
262 * lane.
263 */
264 position.index = i;
265 ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
266 ref_lane == position.lane);
267
268 /* 2 Creating a new block */
269 ref_block =
270 instance->memory + instance->lane_length * ref_lane + ref_index;
271 curr_block = instance->memory + curr_offset;
272 if (ARGON2_VERSION_10 == instance->version) {
273 /* version 1.2.1 and earlier: overwrite, not XOR */
274 fill_block(state, ref_block, curr_block, 0);
275 } else {
276 if(0 == position.pass) {
277 fill_block(state, ref_block, curr_block, 0);
278 } else {
279 fill_block(state, ref_block, curr_block, 1);
280 }
281 }
282 }
283 }