"Fossies" - the Fresh Open Source Software Archive 
Member "passwdqc-2.0.3/pwqfilter.c" (23 Jun 2023, 36817 Bytes) of package /linux/privat/passwdqc-2.0.3.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "pwqfilter.c" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
2.0.2_vs_2.0.3.
1 /*
2 * Copyright (c) 2020 by Solar Designer
3 * See LICENSE
4 */
5
6 #ifdef _MSC_VER
7 #define _CRT_NONSTDC_NO_WARNINGS /* we use unlink() */
8 #define _CRT_SECURE_NO_WARNINGS /* we use fopen() */
9 #include <io.h>
10 #else
11 #include <unistd.h> /* for unlink() */
12 #endif
13
14 #include <stdint.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <assert.h>
19
20 #include "md4.h"
21 #include "passwdqc.h"
22 #define PASSWDQC_FILTER_INTERNALS
23 #include "passwdqc_filter.h"
24
25 /* Flags corresponding to command-line options, can use bits 3 to 23 */
26 #define OPT_VERBOSE 0x08
27 #define OPT_COUNT 0x10
28 #define OPT_LINE_NUMBER 0x20
29 #define OPT_INVERT_MATCH 0x40
30 #define OPT_PRE_HASHED 0x80
31
32 #define OPT_HASH_ID_SHIFT 8
33 #define OPT_HASH_MD4 (PASSWDQC_FILTER_HASH_MD4 << OPT_HASH_ID_SHIFT)
34 #define OPT_HASH_NTLM_CP1252 (PASSWDQC_FILTER_HASH_NTLM_CP1252 << OPT_HASH_ID_SHIFT)
35 #define OPT_HASH_ID_MASK (OPT_HASH_MD4 | OPT_HASH_NTLM_CP1252)
36
37 #define OPT_FP_RATE 0x1000
38 #define OPT_FP_RATE_AT_HIGH_LOAD 0x2000
39 #define OPT_TEST_FP_RATE 0x4000
40
41 /* Bitmask of all supported hash types */
42 #define OPT_HASH_ALL (OPT_HASH_MD4 | OPT_HASH_NTLM_CP1252)
43
44 /* Bitmask of options only valid in lookup mode */
45 #define OPT_LOOKUP (OPT_COUNT | OPT_LINE_NUMBER | OPT_INVERT_MATCH)
46
47 /* Bitmask of options only valid in create and insert modes */
48 #define OPT_INSERT (OPT_FP_RATE | OPT_FP_RATE_AT_HIGH_LOAD)
49
50 /*
51 * Cache line alignment is very important here because of the pattern in which
52 * elements of ssencode[] are used. With 64-byte cache lines, we use 444 of
53 * them with proper alignment, but at worst 563 otherwise. With 128-byte cache
54 * lines, we use 260 with proper alignment, 319 with alignment to 64 but not
55 * 128 bytes, and at worst 374 otherwise. (These numbers do not include the
56 * additional uses by variables that we insert into the largest gap.)
57 */
58 #ifdef __GNUC__
59 __attribute__ ((aligned (128)))
60 #endif
61 static union {
62 uint16_t ssencode[0x10000];
63 struct {
64 /*
65 * Skip until the largest gap in ssencode[], which is from 0xf000 to 0xfffe.
66 * We skip an additional 0x30 elements (96 bytes) so that the hot part of the
67 * header (its second 32 bytes) starts at the beginning of a cache line and
68 * further hot fields that we have in here fall into the same cache line.
69 * Moreover, with the current fields this lets us have the first 8 bytes of
70 * ssdecode[] in the same cache line as well, which makes the rest of it fit
71 * into 121 64-byte cache lines (otherwise, with poor luck it'd need 122).
72 * This brings our total cache usage for these globals to (444+1+121)*64 =
73 * 36224 bytes.
74 */
75 uint16_t skip[0xf030];
76 passwdqc_filter_header_t header;
77 uint64_t maxkicks;
78 passwdqc_filter_packed_t *packed;
79 passwdqc_filter_i_t nbuckets;
80 uint32_t options; /* bitmask of OPT_* flags */
81 uint16_t ssdecode[3876];
82 } s;
83 } globals;
84
85 #define ssencode globals.ssencode
86 #define header globals.s.header
87 #define maxkicks globals.s.maxkicks
88 #define packed globals.s.packed
89 #define nbuckets globals.s.nbuckets
90 #define options globals.s.options
91 #define ssdecode globals.s.ssdecode
92
93 /* Store a copy of (updated) header.threshold in the hottest cache line */
94 #define SET_THRESHOLD(x) options = (options & 0xffffff) | ((uint32_t)(x) << 24);
95 #define GET_THRESHOLD (options >> 24)
96
97 /* For inserts only, also store (updated) header.bucket_size */
98 #define SET_BUCKET_SIZE(x) options = (options & ~7U) | (x);
99 #define GET_BUCKET_SIZE (options & 7)
100
101 static void ssinit(void)
102 {
103 unsigned int a, b, c, d, n = 0;
104 for (d = 0; d < 16; d++)
105 for (c = d; c < 16; c++)
106 for (b = c; b < 16; b++)
107 for (a = b; a < 16; a++) {
108 uint16_t ssd = (d << 12) | (c << 8) | (b << 4) | a;
109 assert(ssd == passwdqc_filter_ssdecode(n));
110 assert(n < sizeof(ssdecode) / sizeof(ssdecode[0]));
111 ssdecode[n++] = ssd;
112 ssencode[ssd] = n;
113 }
114 assert(n == sizeof(ssdecode) / sizeof(ssdecode[0]));
115 assert(&ssdecode[n] <= &ssencode[0xffff]);
116 }
117
118 static inline unsigned int unpack(passwdqc_filter_unpacked_t *dst, const passwdqc_filter_packed_t *src)
119 {
120 /* -1 cast to unsigned becomes greater than bucket size */
121 return (unsigned int)passwdqc_filter_unpack(dst, src, ssdecode);
122 }
123
124 static inline int lookup(passwdqc_filter_hash_t *h, passwdqc_filter_f_t fmask)
125 {
126 passwdqc_filter_i_t i = passwdqc_filter_h2i(h, nbuckets);
127 passwdqc_filter_f_t f = passwdqc_filter_h2f(h);
128
129 passwdqc_filter_unpacked_t u;
130 unsigned int n = unpack(&u, &packed[i]);
131 if (unlikely(n > GET_BUCKET_SIZE))
132 return -1;
133
134 unsigned int j;
135 for (j = 0; j < n; j++)
136 if (passwdqc_filter_f_eq(u.slots[j] & fmask, f & fmask, GET_BUCKET_SIZE))
137 return 1;
138
139 /*
140 * We can skip checking the secondary bucket on lookup when the primary one
141 * is below the fill threshold, but only as long as there are no deletes yet.
142 * Whenever a delete brings a bucket from at to below header.threshold, it
143 * must update header.threshold, and then we must use that in here (we do).
144 */
145 if (n < GET_THRESHOLD)
146 return 0;
147
148 n = unpack(&u, &packed[passwdqc_filter_alti(i, f, nbuckets)]);
149 if (unlikely(n > GET_BUCKET_SIZE))
150 return -1;
151
152 for (j = 0; j < n; j++)
153 if (passwdqc_filter_f_eq(u.slots[j] & fmask, f & fmask, GET_BUCKET_SIZE))
154 return 1;
155
156 return 0;
157 }
158
159 /*
160 * Code specialization flags assuming pack() will be inlined (the corresponding
161 * checks would best be omitted if not inlining).
162 */
163 #define PACK_MASK_OLD 1
164 #define PACK_MASK_NEW 2
165 #define PACK_MASK_ALL (PACK_MASK_OLD | PACK_MASK_NEW)
166
167 static force_inline void pack(passwdqc_filter_packed_t *dst, const passwdqc_filter_unpacked_t *src, unsigned int n, int flags)
168 {
169 if (n == 4) { /* 4x 33-bit as 12-bit semi-sort index, 4x 29-bit */
170 /*
171 * Encode 4x 33-bit fingerprints as 12-bit semi-sort index of 4x 4-bit values
172 * corresponding to most significant bits of each fingerprint, followed by 4x
173 * 29-bit values holding the rest of the fingerprint data in original form.
174 */
175 const unsigned int fbits = 33;
176 const passwdqc_filter_f_t fmask = ((passwdqc_filter_f_t)1 << fbits) - 1;
177 passwdqc_filter_f_t a = src->slots[0];
178 passwdqc_filter_f_t b = src->slots[1];
179 passwdqc_filter_f_t c = src->slots[2];
180 passwdqc_filter_f_t d = src->slots[3];
181 if (flags & PACK_MASK_OLD) {
182 a &= fmask; b &= fmask; c &= fmask;
183 if (flags & PACK_MASK_NEW)
184 d &= fmask;
185 }
186 #define SORT(x, y) if (x < y) { passwdqc_filter_f_t z = x; x = y; y = z; }
187 SORT(a, b)
188 SORT(c, d)
189 /*
190 * The check for "b < c" can be skipped and further 3 SORT() steps performed
191 * unconditionally. This check is a controversial optimization for the case of
192 * updating previously sorted lists. Unfortunately, it increases the average
193 * number of comparisons (but not swaps) for random lists.
194 */
195 if (b < c) {
196 SORT(b, d)
197 SORT(a, c)
198 SORT(b, c)
199 }
200 const unsigned int lobits = fbits - 4;
201 uint16_t ssd = (uint16_t)(a >> lobits);
202 ssd |= (b >> (lobits - 4)) & 0x00f0;
203 ssd |= (c >> (lobits - 8)) & 0x0f00;
204 ssd |= (d >> (lobits - 12)) & 0xf000;
205 const passwdqc_filter_f_t lomask = ((passwdqc_filter_f_t)1 << lobits) - 1;
206 a &= lomask;
207 b &= lomask;
208 c &= lomask;
209 d &= lomask;
210 dst->lo = a | (b << lobits) | (c << (2 * lobits));
211 dst->hi = (c >> (64 - 2 * lobits)) | (d << (3 * lobits - 64)) | ((uint64_t)ssencode[ssd] << (64 - 12));
212 return;
213 }
214
215 if (n == 3) { /* 11111, 3x 41-bit */
216 const unsigned int fbits = 41;
217 const passwdqc_filter_f_t fmask = ((passwdqc_filter_f_t)1 << fbits) - 1;
218 passwdqc_filter_f_t a = src->slots[0];
219 passwdqc_filter_f_t b = src->slots[1];
220 passwdqc_filter_f_t c = src->slots[2];
221 if (flags & PACK_MASK_OLD) {
222 a &= fmask; b &= fmask;
223 if (flags & PACK_MASK_NEW)
224 c &= fmask;
225 }
226 /*
227 * Sorting of fewer than 4 entries is unnecessary, but we use it to detect some
228 * kinds of data corruption. It also very slightly improves compressibility of
229 * the resulting filter files.
230 */
231 SORT(b, c)
232 SORT(a, c)
233 SORT(a, b)
234 dst->lo = a | (b << fbits);
235 dst->hi = (b >> (64 - fbits)) | (c << (2 * fbits - 64)) | ((uint64_t)0xf80 << (64 - 12));
236 return;
237 }
238
239 if (n == 2) { /* 111101, 2x 61-bit */
240 const unsigned int fbits = 61;
241 const passwdqc_filter_f_t fmask = ((passwdqc_filter_f_t)1 << fbits) - 1;
242 passwdqc_filter_f_t a = src->slots[0];
243 passwdqc_filter_f_t b = src->slots[1];
244 if (flags & PACK_MASK_OLD) {
245 a &= fmask;
246 if (flags & PACK_MASK_NEW)
247 b &= fmask;
248 }
249 SORT(a, b)
250 #undef SORT
251 dst->lo = a | (b << fbits);
252 dst->hi = (b >> (64 - fbits)) | ((uint64_t)0xf40 << (64 - 12));
253 return;
254 }
255
256 assert(n == 1);
257
258 dst->lo = src->slots[0];
259 dst->hi = 1;
260 }
261
262 static force_inline unsigned int peek(const passwdqc_filter_packed_t *src)
263 {
264 uint64_t hi = src->hi;
265
266 if (hi <= 1)
267 return (unsigned int)hi; /* 0 or 1 */
268
269 unsigned int ssi = hi >> (64 - 12); /* semi-sort index */
270
271 if (ssi <= 3876)
272 return 4;
273
274 return (ssi >> 7) & 3; /* 2 or 3 */
275 }
276
277 static force_inline int kick(passwdqc_filter_unpacked_t *u, passwdqc_filter_i_t i, passwdqc_filter_f_t f, unsigned int size)
278 {
279 uint32_t rnd = i;
280
281 do {
282 /*
283 * Peek at alternate buckets for each of the fingerprints stored in the bucket
284 * that we have to kick an entry from. If one of those buckets isn't full,
285 * plan to kick that fingerprint. Moreover, if a bucket has 2 or more empty
286 * slots, don't look further and kick that fingerprint right away. There are
287 * two aspects here: (1) never missing a non-full bucket that is just one step
288 * away greatly reduces the number of kicks needed to reach high load factors
289 * (approximately from 16x to 6x of capacity for 98% as compared to pure random
290 * walk, and twice quicker in terms of real time on a certain machine), and (2)
291 * favoring buckets with 2+ empty slots tends to slightly lower the FP rate.
292 */
293 passwdqc_filter_i_t ia;
294 passwdqc_filter_f_t fkick, fdiff = 0;
295 unsigned int n, j = size - 1, bestj = 0;
296 do {
297 fkick = u->slots[j];
298 ia = passwdqc_filter_alti(i, fkick, nbuckets);
299 if ((n = peek(&packed[ia])) < size) {
300 bestj = j;
301 if (!j || n < size - 1)
302 goto kick;
303 }
304 fdiff |= f ^ fkick;
305 } while (j--);
306
307 /* If there are no non-full buckets one step away, resort to random walk */
308 if (!bestj) {
309 /*
310 * If our fingerprint to be (re-)inserted is the same as all of those we could
311 * have kicked, then we're at or close to the maximum number of duplicates for
312 * this fingerprint that we can hold. Don't (re-)insert this duplicate so that
313 * we don't waste many further kicks on a likely failure. Note that this isn't
314 * necessarily the fingerprint insert() was called on now. We might have
315 * already inserted the new fingerprint and if so are now deleting an excessive
316 * duplicate of something previously inserted.
317 */
318 if (unlikely(!fdiff)) {
319 header.dupes++;
320 return 1;
321 }
322 /*
323 * Good randomness is crucial for the random walk. This simple formula works
324 * surprisingly well by mostly reusing variables that we maintain anyway.
325 */
326 rnd = (rnd + (uint32_t)fdiff) * (uint32_t)header.kicks;
327 if (likely(size != 2)) { /* hopefully, compile-time */
328 bestj = rnd >> 30;
329 while (bestj >= size) /* only if size == 3 */
330 bestj = (rnd <<= 2) >> 30;
331 } else {
332 bestj = rnd >> 31;
333 }
334 }
335
336 if (likely(bestj)) { /* recompute unless still have */
337 fkick = u->slots[bestj];
338 ia = passwdqc_filter_alti(i, fkick, nbuckets);
339 }
340
341 kick:
342 u->slots[bestj] = f;
343 pack(&packed[i], u, size, 0);
344
345 n = unpack(u, &packed[ia]);
346 if (unlikely(n > size))
347 return -1;
348
349 if (n < size) {
350 u->slots[n++] = fkick;
351 pack(&packed[ia], u, n, PACK_MASK_OLD);
352 header.inserts++;
353 header.kicks++;
354 return 0;
355 }
356
357 f = fkick;
358 i = ia;
359 } while (likely(++header.kicks < maxkicks));
360
361 return -2;
362 }
363
364 static inline int insert(passwdqc_filter_hash_t *h)
365 {
366 passwdqc_filter_i_t i = passwdqc_filter_h2i(h, nbuckets);
367 passwdqc_filter_f_t f = passwdqc_filter_h2f(h);
368
369 /*
370 * Plan to put this entry into the primary bucket if it's below the threshold.
371 * Otherwise see if the secondary bucket is less full and use it if so. This
372 * logic balances between two conflicting goals: letting us skip the secondary
373 * bucket on lookup when primary isn't full (or is below threshold), and
374 * filling different buckets across the entire table evenly. Each of these
375 * goals has two (luckily non-conflicting) sub-goals. The former reduces FP
376 * rate through comparing against fewer stored fingerprints, and speeds up
377 * lookups. The latter helps reach high load factors in fewer kicks and
378 * preserves more of the larger fingerprints by not putting unnecessarily many
379 * entries in one bucket while we could still avoid that, which also reduces
380 * FP rate. In terms of FP rate, different thresholds turn out to be optimal
381 * depending on target load factor: a threshold of 4 is more optimal for the
382 * highest load factors (near the maximum of 98%), lower thresholds like 2 are
383 * more optimal at lower load factors. Our gradual increase of effective
384 * bucket size plays a further role (even more important at low load factors).
385 */
386 unsigned int n = peek(&packed[i]);
387 if (n >= GET_THRESHOLD) {
388 passwdqc_filter_i_t ia = passwdqc_filter_alti(i, f, nbuckets);
389 if (peek(&packed[ia]) < n)
390 i = ia;
391 }
392
393 passwdqc_filter_unpacked_t u;
394 n = unpack(&u, &packed[i]);
395 if (unlikely(n > GET_BUCKET_SIZE))
396 return -1;
397
398 if (n < GET_BUCKET_SIZE) {
399 u.slots[n++] = f;
400 pack(&packed[i], &u, n, PACK_MASK_ALL);
401 header.inserts++;
402 return 0;
403 }
404
405 /*
406 * At this point, we have one unpacked bucket that is at exactly the current
407 * bucket size. We could have chosen either primary or secondary at random,
408 * as the classic cuckoo filter insertion algorithm does, but testing shows
409 * that this is unnecessary and a fixed implementation-specific choice works
410 * just as well.
411 */
412
413 if (likely(n == 4)) { /* specialized code as an optimization */
414 /*
415 * We only kick fingerprints from full buckets, which implies that they're
416 * already masked to the worst extent possible at the current bucket size.
417 * This lets us use optimized non-masking pack() in kick()'s loop, but only as
418 * long as we don't need the masking for the new fingerprint as well. Let's
419 * pre-mask it here to make this so. We already know we'll have to insert it
420 * into a full bucket (kicking another fingerprint from it), so we couldn't
421 * have preserved those bits anyway.
422 */
423 f &= ((passwdqc_filter_f_t)1 << 33) - 1;
424 return kick(&u, i, f, 4);
425 } else if (likely(n == 2)) { /* and no bucket is larger yet */
426 f &= ((passwdqc_filter_f_t)1 << 61) - 1;
427 return kick(&u, i, f, 2);
428 } else { /* n == 3 and no bucket is larger yet */
429 f &= ((passwdqc_filter_f_t)1 << 41) - 1;
430 return kick(&u, i, f, 3);
431 }
432 }
433
434 static const uint8_t fingerprint_sizes_234[] = {61, 41, 33};
435 static const char * const hash_names[] = {"opaque", "MD4", "NTLM CP1252"};
436
437 static void print_status(void)
438 {
439 printf("Capacity %llu, usage %llu (inserts %llu, deletes %llu), load %.2f%%\n"
440 "Hash type %s, buckets of %u at least %u-bit fingerprints, threshold %u\n"
441 "Effective duplicates omitted %llu, kicks %llu (%.2f of capacity)\n",
442 (unsigned long long)header.capacity, (unsigned long long)(header.inserts - header.deletes),
443 (unsigned long long)header.inserts, (unsigned long long)header.deletes,
444 100. * (header.inserts - header.deletes) / header.capacity,
445 header.hash_id < sizeof(hash_names) / sizeof(hash_names[0]) ? hash_names[header.hash_id] : "unsupported",
446 (unsigned int)header.bucket_size, (unsigned int)fingerprint_sizes_234[header.bucket_size - 2],
447 (unsigned int)header.threshold,
448 (unsigned long long)header.dupes, (unsigned long long)header.kicks,
449 1. * header.kicks / header.capacity);
450 }
451
452 static int new_filter(void)
453 {
454 header.capacity = (header.capacity + 3) & ~3ULL;
455 nbuckets = (uint32_t)(header.capacity >> 2);
456 packed = calloc(nbuckets, sizeof(*packed));
457 if (!packed) {
458 perror("pwqfilter: calloc");
459 return -1;
460 }
461
462 memcpy(header.version, PASSWDQC_FILTER_VERSION, sizeof(header.version));
463 if (options & OPT_FP_RATE_AT_HIGH_LOAD)
464 SET_THRESHOLD(header.threshold = 4)
465 else
466 SET_THRESHOLD(header.threshold = 2)
467 SET_BUCKET_SIZE(header.bucket_size = header.threshold)
468 header.hash_id = (options & OPT_HASH_ID_MASK) >> OPT_HASH_ID_SHIFT;
469 header.endianness = PASSWDQC_FILTER_ENDIANNESS;
470
471 return 0;
472 }
473
474 static int read_filter(const char *filename, int print_status_only)
475 {
476 FILE *f = fopen(filename, "rb");
477 if (!f) {
478 perror("pwqfilter: fopen");
479 return -1;
480 }
481
482 int retval = 0;
483 if (fread(&header, sizeof(header), 1, f) != 1)
484 goto fail_fread;
485
486 if (passwdqc_filter_verify_header(&header)) {
487 fprintf(stderr, "pwqfilter: Invalid or unsupported input filter.\n");
488 goto fail;
489 }
490
491 if ((options & OPT_VERBOSE) || print_status_only) {
492 print_status();
493 if (print_status_only)
494 goto out;
495 }
496
497 SET_THRESHOLD(header.threshold)
498 SET_BUCKET_SIZE(header.bucket_size)
499
500 if ((options & OPT_FP_RATE_AT_HIGH_LOAD) && header.threshold < 4)
501 fprintf(stderr, "pwqfilter: Warning: --optimize-fp-rate-at-high-load is too late for this filter.\n");
502
503 nbuckets = (uint32_t)(header.capacity >> 2);
504 if (SIZE_MAX <= 0xffffffffU && nbuckets > SIZE_MAX / sizeof(*packed)) {
505 fprintf(stderr, "pwqfilter: Input filter claims to be too large for this system.\n");
506 goto fail;
507 }
508
509 packed = malloc(nbuckets * sizeof(*packed));
510 if (!packed) {
511 perror("pwqfilter: malloc");
512 goto fail;
513 }
514
515 if (fread(packed, sizeof(*packed), nbuckets, f) != nbuckets) {
516 fail_fread:
517 if (ferror(f))
518 perror("pwqfilter: fread");
519 else
520 fprintf(stderr, "pwqfilter: fread: Unexpected EOF\n");
521 fail:
522 retval = -1;
523 }
524
525 out:
526 fclose(f);
527
528 return retval;
529 }
530
531 static int write_filter(const char *filename)
532 {
533 FILE *f = fopen(filename, "wb");
534 if (!f) {
535 perror("pwqfilter: fopen");
536 return -1;
537 }
538
539 int retval = 0;
540 if (fwrite(&header, sizeof(header), 1, f) != 1 ||
541 fwrite(packed, sizeof(*packed), nbuckets, f) != nbuckets) {
542 perror("pwqfilter: fwrite");
543 retval = -1;
544 }
545
546 if (fclose(f) || retval) {
547 if (!retval)
548 perror("pwqfilter: fclose");
549 retval = -1;
550 if (unlink(filename))
551 perror("pwqfilter: unlink");
552 }
553
554 return retval;
555 }
556
557 #define READ_LINE_MAX 8192
558
559 static inline char *read_line(void)
560 {
561 #ifdef __GNUC__
562 __attribute__ ((aligned (128)))
563 #endif
564 static char buf[READ_LINE_MAX + 2];
565
566 buf[READ_LINE_MAX] = '\n';
567
568 if (unlikely(!fgets(buf, sizeof(buf), stdin))) {
569 if (ferror(stdin))
570 perror("pwqfilter: fgets");
571 return NULL;
572 }
573
574 if (unlikely(buf[READ_LINE_MAX] != '\n')) {
575 int c;
576 do {
577 c = getc(stdin);
578 } while (c != EOF && c != '\n');
579 if (ferror(stdin)) {
580 perror("pwqfilter: getc");
581 return NULL;
582 }
583 }
584
585 return buf;
586 }
587
588 static inline int unhex(passwdqc_filter_hash_t *dst, const char *src)
589 {
590 #ifdef __GNUC__
591 __attribute__ ((aligned (64)))
592 #endif
593 static const uint8_t a2i[] = {
594 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
595 16, 16, 16, 16, 16, 16, 16,
596 10, 11, 12, 13, 14, 15,
597 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
598 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
599 10, 11, 12, 13, 14, 15
600 };
601 unsigned char *dp = dst->uc;
602 const unsigned char *dend = dst->uc + sizeof(dst->uc);
603 const unsigned char *sp = (const unsigned char *)src;
604
605 do {
606 unsigned int c, hi, lo;
607 c = *sp++ - '0';
608 if (c >= sizeof(a2i) || (hi = a2i[c]) > 15)
609 break;
610 c = *sp++ - '0';
611 if (c >= sizeof(a2i) || (lo = a2i[c]) > 15)
612 break;
613 *dp++ = (hi << 4) | lo;
614 } while (likely(dp < dend));
615
616 return likely(dp == dend) ? 0 : -1;
617 }
618
619 static inline int line_to_hash(passwdqc_filter_hash_t *dst, const char *line, unsigned long long lineno)
620 {
621 if (options & OPT_HASH_ALL) {
622 if (unlikely(line[READ_LINE_MAX] != '\n')) {
623 fprintf(stderr, "\rpwqfilter: Line %llu too long.\n", lineno);
624 return -1;
625 }
626 if (options & OPT_HASH_MD4)
627 passwdqc_filter_md4(dst, line);
628 else
629 passwdqc_filter_ntlm_cp1252(dst, line);
630 } else if (unlikely(unhex(dst, line))) {
631 fprintf(stderr, "\rpwqfilter: Not a supported hex-encoded hash on standard input line %llu.\n", lineno);
632 return -1;
633 }
634
635 return 0;
636 }
637
638 static int lookup_loop(void)
639 {
640 char *line;
641 unsigned long long lineno = 0, lookups = 0, positive = 0, negative = 0, errors = 0;
642
643 while ((line = read_line())) {
644 lineno++;
645
646 passwdqc_filter_hash_t h;
647 if (unlikely(line_to_hash(&h, line, lineno))) {
648 errors++;
649 continue;
650 }
651
652 lookups++;
653 int status = lookup(&h, ~(passwdqc_filter_f_t)0);
654 if (unlikely(status < 0))
655 break;
656 if (status) {
657 positive++;
658 if (!(options & (OPT_COUNT | OPT_INVERT_MATCH))) {
659 print:
660 if (options & OPT_LINE_NUMBER)
661 printf("%llu:", lineno);
662 fputs(line, stdout);
663 }
664 } else {
665 negative++;
666 if ((options & (OPT_COUNT | OPT_INVERT_MATCH)) == OPT_INVERT_MATCH)
667 goto print;
668 }
669 }
670
671 if (line)
672 fprintf(stderr, "Data corruption detected, abandoning further search\n");
673 else if (options & OPT_COUNT)
674 printf("%llu\n", (options & OPT_INVERT_MATCH) ? negative : positive);
675 if (options & OPT_VERBOSE)
676 fprintf(stderr, "Lines %llu, lookups %llu, positive %llu, negative %llu, errors %llu\n",
677 lineno, lookups, positive, negative, errors);
678
679 if (line || ferror(stdin))
680 return -1;
681
682 return !!((options & OPT_INVERT_MATCH) ? negative : positive);
683 }
684
685 static void set_bucket_size(void)
686 {
687 uint64_t usage = header.inserts - header.deletes;
688 uint64_t max_kicks_until_size_3 = (header.capacity >> ((options & OPT_FP_RATE) ? 2 : 5)) * 3;
689 unsigned int size = 4;
690 if (usage < header.capacity * 44 / 100 && header.kicks <= max_kicks_until_size_3)
691 size = 2;
692 else if (usage < header.capacity * 71 / 100 && header.kicks <= (max_kicks_until_size_3 << 1))
693 size = 3;
694
695 if (size < GET_THRESHOLD)
696 size = GET_THRESHOLD;
697
698 if (size > GET_BUCKET_SIZE || !header.inserts) {
699 if (size > GET_BUCKET_SIZE)
700 SET_BUCKET_SIZE((header.bucket_size = size))
701 if (options & OPT_VERBOSE) {
702 putc('\r', stderr);
703 printf("Storing at least %u-bit fingerprints since load %.2f%%, kicks %.2f of capacity\n",
704 (unsigned int)fingerprint_sizes_234[GET_BUCKET_SIZE - 2],
705 100. * (header.inserts - header.deletes) / header.capacity,
706 1. * header.kicks / header.capacity);
707 }
708 }
709 }
710
711 static void print_progress(unsigned long long lineno)
712 {
713 fprintf(stderr, "\rLines %.*f%s, load %.2f%%, kicks %.2f of capacity",
714 lineno < 1000000 ? 0 : 3,
715 lineno < 1000000 ? (double)lineno : 1e-6 * lineno,
716 lineno < 1000000 ? "" : "M",
717 100. * (header.inserts - header.deletes) / header.capacity,
718 1. * header.kicks / header.capacity);
719 }
720
721 static int insert_loop(void)
722 {
723 uint64_t inserts_start = header.inserts;
724 uint64_t dupes_start = header.dupes;
725
726 uint64_t checkpoint = 0, previous = 0;
727 uint64_t effort_step = (header.capacity + 199) / 200;
728 uint64_t inserts_step = effort_step;
729 uint64_t inserts_goal = header.capacity / 10;
730 if (inserts_goal < header.inserts)
731 inserts_goal = header.inserts;
732 maxkicks = header.capacity;
733
734 int status = 0;
735 char *line;
736 unsigned long long lineno = 0, errors = 0;
737
738 /*
739 * A threshold of 0 is different for lookup, but we can optimize its handling
740 * for insert.
741 */
742 if (GET_THRESHOLD == 0)
743 SET_THRESHOLD(1)
744
745 while ((line = read_line())) {
746 uint64_t effort = header.inserts + header.kicks;
747 if (unlikely(effort >= checkpoint)) {
748 set_bucket_size();
749 if (!checkpoint || effort - previous >= 1000000) {
750 previous = effort;
751 print_progress(lineno);
752 }
753 checkpoint = effort + effort_step;
754 if (header.inserts >= inserts_goal) {
755 uint64_t usage = header.inserts - header.deletes;
756 if (usage > header.capacity)
757 break;
758 if (usage >= header.capacity * 97 / 100)
759 inserts_step = (header.capacity + 999 - usage) / 1000;
760 else
761 inserts_step = (header.capacity + 199 - usage) / 200;
762 inserts_goal = header.inserts + inserts_step;
763 maxkicks = header.kicks + header.capacity;
764 }
765 }
766
767 lineno++;
768
769 passwdqc_filter_hash_t h;
770 if (unlikely(line_to_hash(&h, line, lineno))) {
771 errors++;
772 continue;
773 }
774
775 if (unlikely((status = insert(&h)) < 0))
776 break;
777 }
778
779 SET_THRESHOLD(header.threshold)
780
781 if (line) {
782 print_progress(lineno);
783 if (status == -2) {
784 /*
785 * We have to abandon the filter here because when we bump into maxkicks we've
786 * kicked out and not re-inserted an entry likely other than the one we were
787 * trying to insert originally. To avoid this, we'd need a separate soft limit
788 * that we'd most likely bump into between insert() calls (not inside a call).
789 */
790 fprintf(stderr, "\nProgress almost stalled, abandoning incomplete filter\n");
791 /*
792 * For filters of medium size (some million entries), we expect to be able to
793 * achieve a little over 98% (e.g., 98.03%) with unbiased non-repeating inputs.
794 * For small filters, there's significant variability of maximum achievable
795 * load (e.g., 97.7% to 98.3%). For filters approaching the maximum supported
796 * capacity of almost 2^34, the biases caused by our use of only 32 bits in
797 * h2i() become significant and in simulation limit the achievable load e.g. to
798 * 97% for a capacity of a little over half the maximum. To be on the safe
799 * side, we only print a likely explanation for below 97% and only for filters
800 * that are way below the maximum capacity.
801 */
802 if (header.capacity <= (1ULL << 32) &&
803 header.inserts - header.deletes < header.capacity * 97 / 100)
804 fprintf(stderr, "Likely too many repeating%s inputs%s\n",
805 (options & OPT_HASH_ALL) ? "" : " or biased",
806 header.capacity < 1000000 ? " or filter is too small" : "");
807 } else { /* -1 return from insert() or usage > capacity */
808 fprintf(stderr, "\nData corruption detected, abandoning incomplete filter\n");
809 }
810 }
811 fprintf(stderr, "\rLines %llu, inserts %llu, excessive effective duplicates %llu, errors %llu\n",
812 lineno, (unsigned long long)(header.inserts - inserts_start), (unsigned long long)(header.dupes - dupes_start), errors);
813
814 return (line || ferror(stdin)) ? -1 : 0;
815 }
816
817 static int test_fp_rate(void)
818 {
819 unsigned int fps = 0, tests = 0, errors = 0;
820
821 if (header.inserts != header.deletes)
822 do {
823 int i, n = tests + (1 << 22); /* signed int for OpenMP 2.5 */
824 #ifdef _OPENMP
825 #pragma omp parallel for default(none) private(i) shared(n, fps, tests, errors)
826 #endif
827 for (i = tests; i < n; i++) {
828 passwdqc_filter_hash_t h;
829 MD4_CTX ctx;
830 MD4_Init(&ctx);
831 ctx.a += i;
832 MD4_Update(&ctx, "notNTLM", 8);
833 MD4_Final(h.uc, &ctx);
834
835 /*
836 * Process the hash table semi-sequentially for some speedup. As long as we
837 * ensure we test all possible values of the first 3 bytes, this does not bias
838 * the final estimate, but the verbose output shown during testing might show
839 * biased numbers until eventually converging to the global average. See also
840 * the comment in passwdqc_filter_h2i().
841 */
842 h.uc[0] = i >> 22;
843 h.uc[1] = i >> 14;
844 h.uc[2] = i >> 6;
845 h.u32[0] = ((h.u32[0] & 0x0f0f0f0f) << 4) | ((h.u32[0] >> 4) & 0x0f0f0f0f);
846
847 switch (lookup(&h, ~(passwdqc_filter_f_t)0xfffff)) {
848 case 0:
849 break;
850 case 1:
851 #ifdef _OPENMP
852 #pragma omp atomic
853 #endif
854 fps++;
855 break;
856 default: /* -1 */
857 #ifdef _OPENMP
858 #pragma omp atomic
859 #endif
860 errors++;
861 }
862 #ifndef _OPENMP
863 if (unlikely(errors))
864 break;
865 #endif
866 }
867 tests = n;
868
869 double progress = 100. * tests / (1 << 30);
870 if (options & OPT_VERBOSE)
871 fprintf(stderr, "\rTests %u (%.2f%%), FPs %u (rate %.3e) for fingerprints cut by 20 bits",
872 tests, progress, fps, (double)fps / tests);
873 else
874 fprintf(stderr, "\rTests %u (%.2f%%)", tests, progress);
875 } while (tests < (1 << 30) && !errors);
876
877 if (tests)
878 putc('\n', stderr);
879 if (errors) {
880 fprintf(stderr, "Data corruption detected, abandoning further testing\n");
881 return -1;
882 }
883 if (fps) {
884 double bperfp = 1e-9 * ((unsigned long long)tests << 20) / fps;
885 printf("Estimated FP rate 1 in %.*f billion\n", (bperfp < 10) + (bperfp < 100) + (bperfp < 1000), bperfp);
886 } else {
887 printf("Estimated FP rate 0 (%s)\n", tests ? "no FPs seen in testing" : "empty filter");
888 }
889
890 return 0;
891 }
892
893 static int opt_eq(const char *ref, const char *opt, const char **arg)
894 {
895 size_t n = strlen(ref);
896 int retval = !strncmp(ref, opt, n) && (!opt[n] || opt[n] == '=');
897 if (retval && opt[n] && opt[n + 1])
898 *arg = &opt[n + 1];
899 return retval;
900 }
901
902 static void print_help(void)
903 {
904 puts("Manage binary passphrase filter files.\n"
905 "\nUsage: pwqfilter [options]\n"
906 "\nValid options are:\n"
907 "Modes\n"
908 " --lookup (default)\n"
909 " lookup plaintexts or hashes against an existing filter;\n"
910 " --status\n"
911 " print usage statistics for an existing filter;\n"
912 " --create=CAPACITY\n"
913 " create a new filter for up to ~98% of CAPACITY entries;\n"
914 " --insert\n"
915 " insert entries into an existing filter;\n"
916 " --test-fp-rate (can be used on its own or along with another mode)\n"
917 " estimate the false positive rate (FP rate) of a filter;\n"
918 "Optimizations (with --create or --insert)\n"
919 " --optimize-fp-rate\n"
920 " better than default FP rate, briefly slower inserts after ~30% and ~60%;\n"
921 " --optimize-fp-rate-at-high-load\n"
922 " better than default FP rate at load ~95% to 98%, a lot worse below ~90%;\n"
923 "Input and output\n"
924 " -f FILE or --filter=FILE\n"
925 " read an existing filter from FILE;\n"
926 " -o FILE or --output=FILE\n"
927 " write a new or modified filter to FILE;\n"
928 " --pre-hashed (default for filters created with this option and no --hash-*)\n"
929 " lookup or insert by hex-encoded hashes, not plaintexts;\n"
930 " --hash-md4 (default for new filters)\n"
931 " hash plaintexts with MD4 prior to lookup or insert;\n"
932 " --hash-ntlm-cp1252\n"
933 " hash assumed CP1252 plaintexts with NTLM prior to lookup or insert;\n"
934 "Lookup output modifiers\n"
935 " -c or --count\n"
936 " print a count of (non-)matching lines instead of the lines themselves;\n"
937 " -n or --line-number\n"
938 " prefix each line with its number in the input stream;\n"
939 " -v or --invert-match\n"
940 " print or count non-matching lines;\n"
941 "General\n"
942 " --verbose\n"
943 " print additional information;\n"
944 " --version\n"
945 " print program version and exit;\n"
946 " -h or --help\n"
947 " print this help text and exit.");
948 }
949
950 int main(int argc, char **argv)
951 {
952 enum {MODE_NONE = 0, MODE_LOOKUP = 1, MODE_STATUS = 2, MODE_CREATE = 3, MODE_INSERT} mode = MODE_NONE;
953 const char *input = NULL, *output = NULL;
954
955 options = 0;
956
957 if (unlikely(argc <= 1)) {
958 fprintf(stderr, "pwqfilter: No action requested, try --help.\n");
959 return 2;
960 }
961
962 while (argc > 1) {
963 const char *opt = argv[1], *arg = NULL;
964 if (opt[0] == '-' && opt[1] != '-' && opt[1] && opt[2]) {
965 static char optbuf[3] = {'-', 0, 0};
966 optbuf[1] = opt[1];
967 opt = optbuf;
968 memmove(&argv[1][1], &argv[1][2], strlen(&argv[1][1]));
969 } else {
970 argc--; argv++;
971 }
972
973 if (!strcmp("-h", opt) || !strcmp("--help", opt)) {
974 print_help();
975 return 0;
976 }
977
978 if (!strcmp("--version", opt)) {
979 printf("pwqfilter version %s\n", PASSWDQC_VERSION);
980 return 0;
981 }
982
983 if (!strcmp("--lookup", opt)) {
984 if (mode || output)
985 goto fail_conflict;
986 mode = MODE_LOOKUP;
987 continue;
988 }
989
990 if (!strcmp("--status", opt)) {
991 if (mode || (options & (OPT_HASH_ALL | OPT_PRE_HASHED)))
992 goto fail_conflict;
993 mode = MODE_STATUS;
994 continue;
995 }
996
997 if (opt_eq("--create", opt, &arg)) {
998 if (mode || input || (options & OPT_LOOKUP))
999 goto fail_conflict;
1000 mode = MODE_CREATE;
1001 if (!arg)
1002 goto fail_no_arg;
1003 char *e;
1004 header.capacity = strtoul(arg, &e, 0);
1005 if (*e || !header.capacity || header.capacity > ((1ULL << 32) - 1) * 4) {
1006 fprintf(stderr, "pwqfilter: Requested capacity is invalid or unsupported.\n");
1007 return 2;
1008 }
1009 continue;
1010 }
1011
1012 if (!strcmp("--insert", opt)) {
1013 if (mode || (options & OPT_LOOKUP))
1014 goto fail_conflict;
1015 mode = MODE_INSERT;
1016 continue;
1017 }
1018
1019 if (!strcmp("--test-fp-rate", opt)) {
1020 options |= OPT_TEST_FP_RATE;
1021 continue;
1022 }
1023
1024 if (!strcmp("--optimize-fp-rate", opt)) {
1025 if (options & OPT_FP_RATE_AT_HIGH_LOAD)
1026 goto fail_conflict;
1027 options |= OPT_FP_RATE;
1028 continue;
1029 }
1030
1031 if (!strcmp("--optimize-fp-rate-at-high-load", opt)) {
1032 if (options & OPT_FP_RATE)
1033 goto fail_conflict;
1034 options |= OPT_FP_RATE_AT_HIGH_LOAD;
1035 continue;
1036 }
1037
1038 if (!strcmp("-f", opt) || opt_eq("--filter", opt, &arg)) {
1039 if (mode == MODE_CREATE || input)
1040 goto fail_conflict;
1041 if (!opt[2]) {
1042 argc--;
1043 arg = *++argv;
1044 }
1045 if (!arg)
1046 goto fail_no_arg;
1047 input = arg;
1048 continue;
1049 }
1050
1051 if (!strcmp("-o", opt) || opt_eq("--output", opt, &arg)) {
1052 if (mode == MODE_LOOKUP || mode == MODE_STATUS || output)
1053 goto fail_conflict;
1054 if (!opt[2]) {
1055 argc--;
1056 arg = *++argv;
1057 }
1058 if (!arg)
1059 goto fail_no_arg;
1060 output = arg;
1061 continue;
1062 }
1063
1064 if (!strcmp("--pre-hashed", opt)) {
1065 if (mode == MODE_STATUS)
1066 goto fail_conflict;
1067 options |= OPT_PRE_HASHED;
1068 continue;
1069 }
1070
1071 if (!strcmp("--hash-md4", opt)) {
1072 if ((options & OPT_HASH_ALL) || mode == MODE_STATUS)
1073 goto fail_conflict;
1074 options |= OPT_HASH_MD4;
1075 continue;
1076 }
1077
1078 if (!strcmp("--hash-ntlm-cp1252", opt)) {
1079 if ((options & OPT_HASH_ALL) || mode == MODE_STATUS)
1080 goto fail_conflict;
1081 options |= OPT_HASH_NTLM_CP1252;
1082 continue;
1083 }
1084
1085 if (!strcmp("-c", opt) || !strcmp("--count", opt)) {
1086 if (mode > MODE_LOOKUP || (options & OPT_LINE_NUMBER))
1087 goto fail_conflict;
1088 options |= OPT_COUNT;
1089 continue;
1090 }
1091
1092 if (!strcmp("-n", opt) || !strcmp("--line-number", opt)) {
1093 if (mode > MODE_LOOKUP || (options & OPT_COUNT))
1094 goto fail_conflict;
1095 options |= OPT_LINE_NUMBER;
1096 continue;
1097 }
1098
1099 if (!strcmp("-v", opt) || !strcmp("--invert-match", opt)) {
1100 if (mode > MODE_LOOKUP)
1101 goto fail_conflict;
1102 options |= OPT_INVERT_MATCH;
1103 continue;
1104 }
1105
1106 if (!strcmp("--verbose", opt)) {
1107 options |= OPT_VERBOSE;
1108 continue;
1109 }
1110
1111 fprintf(stderr, "pwqfilter: Option %s unrecognized.\n", opt);
1112 return 2;
1113
1114 fail_no_arg:
1115 fprintf(stderr, "pwqfilter: Option %s requires an argument.\n", opt);
1116 return 2;
1117
1118 fail_conflict:
1119 fprintf(stderr, "pwqfilter: Option %s conflicts with previously specified options.\n", opt);
1120 return 2;
1121 }
1122
1123 if (!mode) {
1124 if (options & OPT_TEST_FP_RATE) {
1125 if (options & (OPT_HASH_ALL | OPT_PRE_HASHED))
1126 goto fail_unused;
1127 } else {
1128 mode = MODE_LOOKUP; /* default mode */
1129 }
1130 }
1131
1132 if (!input && !(options & (OPT_HASH_ALL | OPT_PRE_HASHED)))
1133 options |= OPT_HASH_MD4; /* default hash type */
1134
1135 if (mode <= MODE_STATUS && output) {
1136 fprintf(stderr, "pwqfilter: No filter modifications requested yet an output filename specified.\n");
1137 return 2;
1138 }
1139
1140 if ((mode != MODE_LOOKUP && (options & OPT_LOOKUP)) ||
1141 (mode < MODE_CREATE && (options & OPT_INSERT)) ||
1142 (mode != MODE_CREATE && (options & OPT_HASH_ALL) && (options & OPT_PRE_HASHED))) {
1143 fail_unused:
1144 fprintf(stderr, "pwqfilter: The requested mode doesn't use other specified options.\n");
1145 return 2;
1146 }
1147
1148 if (mode != MODE_CREATE && !input) {
1149 fprintf(stderr, "pwqfilter: Neither requested to create a new filter nor to use an existing one.\n");
1150 return 2;
1151 }
1152
1153 if (mode > MODE_STATUS && !output)
1154 fprintf(stderr, "pwqfilter: No output filename specified - doing a dry run.\n");
1155
1156 if ((input && read_filter(input, mode == MODE_STATUS)) || (!input && new_filter()))
1157 return 2;
1158
1159 /*
1160 * The uses of (un)likely() here optimize for --create --pre-hashed. Somehow
1161 * omitting them results in very different code (smaller and slower) in inner
1162 * loops at least on a certain RHEL7'ish test system.
1163 */
1164 if (unlikely(mode == MODE_STATUS)) {
1165 if ((options & OPT_TEST_FP_RATE) && test_fp_rate())
1166 return 2;
1167 return 0;
1168 }
1169
1170 if (!likely(options & OPT_PRE_HASHED)) {
1171 if (header.hash_id > PASSWDQC_FILTER_HASH_MAX) {
1172 fprintf(stderr, "pwqfilter: Input filter claims unsupported hash type.\n");
1173 return 2;
1174 }
1175
1176 if (header.hash_id != PASSWDQC_FILTER_HASH_OPAQUE) {
1177 uint32_t new_options = (options & ~OPT_HASH_ID_MASK) | ((uint32_t)header.hash_id << OPT_HASH_ID_SHIFT);
1178 if ((options & OPT_HASH_ALL) && new_options != options) {
1179 fprintf(stderr, "pwqfilter: Input filter's hash type is different than requested.\n");
1180 return 2;
1181 }
1182 options = new_options;
1183 }
1184 }
1185
1186 ssinit();
1187
1188 if (mode == MODE_LOOKUP) {
1189 int status = 1 - lookup_loop();
1190 if ((options & OPT_TEST_FP_RATE) && test_fp_rate())
1191 return 2;
1192 return status;
1193 }
1194
1195 if (likely(mode >= MODE_CREATE)) {
1196 /*
1197 * The weird combination of --pre-hashed and --hash* is allowed with --create
1198 * for writing the claimed hash type into the filter, but shouldn't result in
1199 * us hashing the hashes.
1200 */
1201 if (options & OPT_PRE_HASHED)
1202 options &= ~OPT_HASH_ALL;
1203
1204 if (insert_loop())
1205 return 2;
1206
1207 if (options & OPT_VERBOSE)
1208 print_status();
1209
1210 if (output && write_filter(output))
1211 return 2;
1212 }
1213
1214 if ((options & OPT_TEST_FP_RATE) && test_fp_rate())
1215 return 2;
1216
1217 return 0;
1218 }