1#![allow(clippy::too_many_arguments)]
5
6use alloc::vec;
7use alloc::vec::Vec;
8
9use crate::{
10 common::is_short_half,
11 fft,
12 fpr::*,
13 rng::{prng_get_u64, prng_get_u8, prng_init, Prng},
14 shake::InnerShake256Context,
15};
16
17fn ffldl_treesize(logn: u32) -> usize {
24 ((logn + 1) as usize) << logn
25}
26
27fn ffldl_fft_inner(tree: &mut [Fpr], g0: &mut [Fpr], g1: &mut [Fpr], logn: u32, tmp: &mut [Fpr]) {
33 let n: usize = 1 << logn;
34 if n == 1 {
35 tree[0] = g0[0];
36 return;
37 }
38 let hn = n >> 1;
39
40 fft::poly_ldlmv_fft(tmp, tree, g0, g1, g0, logn);
42
43 {
46 let (g1_lo, g1_hi) = g1.split_at_mut(hn);
47 fft::poly_split_fft(g1_lo, g1_hi, &g0[..n], logn);
48 }
49 {
50 let (g0_lo, g0_hi) = g0.split_at_mut(hn);
51 fft::poly_split_fft(g0_lo, g0_hi, &tmp[..n], logn);
52 }
53
54 {
56 let (g1_lo, g1_rest) = g1.split_at_mut(hn);
57 ffldl_fft_inner(&mut tree[n..], g1_lo, g1_rest, logn - 1, tmp);
58 }
59
60 let off = n + ffldl_treesize(logn - 1);
62 {
63 let (g0_lo, g0_rest) = g0.split_at_mut(hn);
64 ffldl_fft_inner(&mut tree[off..], g0_lo, g0_rest, logn - 1, tmp);
65 }
66}
67
68fn ffldl_fft(tree: &mut [Fpr], g00: &[Fpr], g01: &[Fpr], g11: &[Fpr], logn: u32, tmp: &mut [Fpr]) {
71 let n: usize = 1 << logn;
72 if n == 1 {
73 tree[0] = g00[0];
74 return;
75 }
76 let hn = n >> 1;
77
78 let d00 = &mut tmp[..n];
79 d00.copy_from_slice(&g00[..n]);
80
81 let (d00_slice, rest) = tmp.split_at_mut(n);
83 let (d11_slice, scratch) = rest.split_at_mut(n);
84
85 fft::poly_ldlmv_fft(d11_slice, tree, g00, g01, g11, logn);
86
87 {
89 let (s0, s1) = scratch.split_at_mut(hn);
90 fft::poly_split_fft(s0, s1, d00_slice, logn);
91 }
92 {
94 let (d0, d1) = d00_slice.split_at_mut(hn);
95 fft::poly_split_fft(d0, d1, d11_slice, logn);
96 }
97 d11_slice[..n].copy_from_slice(&scratch[..n]);
99
100 {
102 let (d11_lo, d11_hi) = d11_slice.split_at_mut(hn);
103 ffldl_fft_inner(&mut tree[n..], d11_lo, d11_hi, logn - 1, scratch);
104 }
105
106 let off = n + ffldl_treesize(logn - 1);
108 {
109 let (d00_lo, d00_hi) = d00_slice.split_at_mut(hn);
110 ffldl_fft_inner(&mut tree[off..], d00_lo, d00_hi, logn - 1, scratch);
111 }
112}
113
114fn ffldl_binary_normalize(tree: &mut [Fpr], orig_logn: u32, logn: u32) {
117 let n: usize = 1 << logn;
118 if n == 1 {
119 tree[0] = fpr_mul(fpr_sqrt(tree[0]), FPR_INV_SIGMA[orig_logn as usize]);
120 } else {
121 ffldl_binary_normalize(&mut tree[n..], orig_logn, logn - 1);
122 let off = n + ffldl_treesize(logn - 1);
123 ffldl_binary_normalize(&mut tree[off..], orig_logn, logn - 1);
124 }
125}
126
127fn smallints_to_fpr(r: &mut [Fpr], t: &[i8], logn: u32) {
134 let n: usize = 1 << logn;
135 for u in 0..n {
136 r[u] = fpr_of(t[u] as i64);
137 }
138}
139
140#[inline(always)]
142fn skoff_b00(_logn: u32) -> usize {
143 0
144}
145#[inline(always)]
146fn skoff_b01(logn: u32) -> usize {
147 1 << logn
148}
149#[inline(always)]
150fn skoff_b10(logn: u32) -> usize {
151 2 << logn
152}
153#[inline(always)]
154fn skoff_b11(logn: u32) -> usize {
155 3 << logn
156}
157#[inline(always)]
158fn skoff_tree(logn: u32) -> usize {
159 4 << logn
160}
161
162pub fn expand_privkey(
165 expanded_key: &mut [Fpr],
166 f: &[i8],
167 g: &[i8],
168 big_f: &[i8],
169 big_g: &[i8],
170 logn: u32,
171 tmp: &mut [u8],
172) {
173 let n: usize = 1 << logn;
174
175 let b00_off = skoff_b00(logn);
176 let b01_off = skoff_b01(logn);
177 let b10_off = skoff_b10(logn);
178 let b11_off = skoff_b11(logn);
179 let tree_off = skoff_tree(logn);
180
181 smallints_to_fpr(&mut expanded_key[b01_off..], f, logn);
183 smallints_to_fpr(&mut expanded_key[b00_off..], g, logn);
184 smallints_to_fpr(&mut expanded_key[b11_off..], big_f, logn);
185 smallints_to_fpr(&mut expanded_key[b10_off..], big_g, logn);
186
187 fft::fft(&mut expanded_key[b01_off..b01_off + n], logn);
188 fft::fft(&mut expanded_key[b00_off..b00_off + n], logn);
189 fft::fft(&mut expanded_key[b11_off..b11_off + n], logn);
190 fft::fft(&mut expanded_key[b10_off..b10_off + n], logn);
191 fft::poly_neg(&mut expanded_key[b01_off..b01_off + n], logn);
192 fft::poly_neg(&mut expanded_key[b11_off..b11_off + n], logn);
193
194 let ftmp: &mut [Fpr] = unsafe {
196 debug_assert!(
197 tmp.as_ptr() as usize % core::mem::align_of::<Fpr>() == 0,
198 "expand_privkey: tmp not Fpr-aligned"
199 );
200 core::slice::from_raw_parts_mut(
201 tmp.as_mut_ptr() as *mut Fpr,
202 tmp.len() / core::mem::size_of::<Fpr>(),
203 )
204 };
205
206 let (g00, rest) = ftmp.split_at_mut(n);
207 let (g01_g, rest) = rest.split_at_mut(n);
208 let (g11_g, gxx) = rest.split_at_mut(n);
209
210 g00.copy_from_slice(&expanded_key[b00_off..b00_off + n]);
211 fft::poly_mulselfadj_fft(g00, logn);
212 gxx[..n].copy_from_slice(&expanded_key[b01_off..b01_off + n]);
213 fft::poly_mulselfadj_fft(&mut gxx[..n], logn);
214 fft::poly_add(g00, &gxx[..n], logn);
215
216 g01_g.copy_from_slice(&expanded_key[b00_off..b00_off + n]);
217 fft::poly_muladj_fft(g01_g, &expanded_key[b10_off..b10_off + n], logn);
218 gxx[..n].copy_from_slice(&expanded_key[b01_off..b01_off + n]);
219 fft::poly_muladj_fft(&mut gxx[..n], &expanded_key[b11_off..b11_off + n], logn);
220 fft::poly_add(g01_g, &gxx[..n], logn);
221
222 g11_g.copy_from_slice(&expanded_key[b10_off..b10_off + n]);
223 fft::poly_mulselfadj_fft(g11_g, logn);
224 gxx[..n].copy_from_slice(&expanded_key[b11_off..b11_off + n]);
225 fft::poly_mulselfadj_fft(&mut gxx[..n], logn);
226 fft::poly_add(g11_g, &gxx[..n], logn);
227
228 ffldl_fft(&mut expanded_key[tree_off..], g00, g01_g, g11_g, logn, gxx);
229 ffldl_binary_normalize(&mut expanded_key[tree_off..], logn, logn);
230}
231
232pub struct SamplerContext {
238 pub p: Prng,
239 pub sigma_min: Fpr,
240}
241
242type SamplerZ = fn(&mut SamplerContext, Fpr, Fpr) -> i32;
244
245fn ff_sampling_fft_dyntree(
251 samp: SamplerZ,
252 samp_ctx: &mut SamplerContext,
253 t0: &mut [Fpr],
254 t1: &mut [Fpr],
255 g00: &mut [Fpr],
256 g01: &mut [Fpr],
257 g11: &mut [Fpr],
258 orig_logn: u32,
259 logn: u32,
260 tmp: &mut [Fpr],
261) {
262 if logn == 0 {
263 let leaf = fpr_mul(fpr_sqrt(g00[0]), FPR_INV_SIGMA[orig_logn as usize]);
264 t0[0] = fpr_of(samp(samp_ctx, t0[0], leaf) as i64);
265 t1[0] = fpr_of(samp(samp_ctx, t1[0], leaf) as i64);
266 return;
267 }
268
269 let n: usize = 1 << logn;
270 let hn = n >> 1;
271
272 fft::poly_ldl_fft(g00, g01, g11, logn);
274
275 {
277 let (t_lo, t_hi) = tmp.split_at_mut(hn);
278 fft::poly_split_fft(t_lo, t_hi, g00, logn);
279 }
280 g00[..n].copy_from_slice(&tmp[..n]);
281 {
282 let (t_lo, t_hi) = tmp.split_at_mut(hn);
283 fft::poly_split_fft(t_lo, t_hi, g11, logn);
284 }
285 g11[..n].copy_from_slice(&tmp[..n]);
286 tmp[..n].copy_from_slice(&g01[..n]);
287 g01[..hn].copy_from_slice(&g00[..hn]);
288 g01[hn..n].copy_from_slice(&g11[..hn]);
289
290 {
292 let z1 = &mut tmp[n..];
293 let (z1_lo, z1_hi_and_rest) = z1.split_at_mut(hn);
294 let (z1_hi, scratch) = z1_hi_and_rest.split_at_mut(hn);
295 fft::poly_split_fft(z1_lo, z1_hi, t1, logn);
296
297 let (g11_lo, g11_hi) = g11.split_at_mut(hn);
298 ff_sampling_fft_dyntree(
299 samp,
300 samp_ctx,
301 z1_lo,
302 z1_hi,
303 g11_lo,
304 g11_hi,
305 &mut g01[hn..],
306 orig_logn,
307 logn - 1,
308 scratch,
309 );
310
311 let mut z1_lo_copy = vec![FPR_ZERO; hn];
313 let mut z1_hi_copy = vec![FPR_ZERO; hn];
314 z1_lo_copy.copy_from_slice(z1_lo);
315 z1_hi_copy.copy_from_slice(z1_hi);
316
317 fft::poly_merge_fft(&mut scratch[..n], &z1_lo_copy, &z1_hi_copy, logn);
319
320 z1_lo.copy_from_slice(&t1[..hn]);
323 z1_hi.copy_from_slice(&t1[hn..n]);
324 let mut merged_copy = vec![FPR_ZERO; n];
326 merged_copy.copy_from_slice(&scratch[..n]);
327 t1[..n].copy_from_slice(&merged_copy);
328 }
329 {
331 let (l10, z1_full) = tmp.split_at_mut(n);
332 let _ = l10;
333 fft::poly_sub(&mut z1_full[..n], &t1[..n], logn);
334 }
335
336 {
339 let (l10, diff) = tmp.split_at_mut(n);
340 fft::poly_mul_fft(l10, &diff[..n], logn);
341 }
342 fft::poly_add(t0, &tmp[..n], logn);
343
344 {
346 let (z0, rest_tmp) = tmp.split_at_mut(n);
347 let (z0_lo, z0_hi) = z0.split_at_mut(hn);
348 fft::poly_split_fft(z0_lo, z0_hi, t0, logn);
349 let (g00_lo, g00_hi) = g00.split_at_mut(hn);
350 ff_sampling_fft_dyntree(
351 samp,
352 samp_ctx,
353 z0_lo,
354 z0_hi,
355 g00_lo,
356 g00_hi,
357 g01,
358 orig_logn,
359 logn - 1,
360 rest_tmp,
361 );
362 let mut z0_lo_copy = vec![FPR_ZERO; hn];
363 let mut z0_hi_copy = vec![FPR_ZERO; hn];
364 z0_lo_copy.copy_from_slice(z0_lo);
365 z0_hi_copy.copy_from_slice(z0_hi);
366 fft::poly_merge_fft(t0, &z0_lo_copy, &z0_hi_copy, logn);
367 }
368}
369
370fn ff_sampling_fft(
377 samp: SamplerZ,
378 samp_ctx: &mut SamplerContext,
379 z0: &mut [Fpr],
380 z1: &mut [Fpr],
381 tree: &[Fpr],
382 t0: &[Fpr],
383 t1: &[Fpr],
384 logn: u32,
385 tmp: &mut [Fpr],
386) {
387 if logn == 2 {
389 let tree0 = &tree[4..];
390 let tree1 = &tree[8..];
391
392 let a_re = t1[0];
394 let a_im = t1[2];
395 let b_re = t1[1];
396 let b_im = t1[3];
397 let c_re = fpr_add(a_re, b_re);
398 let c_im = fpr_add(a_im, b_im);
399 let mut w0 = fpr_half(c_re);
400 let mut w1 = fpr_half(c_im);
401 let c_re = fpr_sub(a_re, b_re);
402 let c_im = fpr_sub(a_im, b_im);
403 let mut w2 = fpr_mul(fpr_add(c_re, c_im), FPR_INVSQRT8);
404 let mut w3 = fpr_mul(fpr_sub(c_im, c_re), FPR_INVSQRT8);
405
406 let x0 = w2;
407 let x1 = w3;
408 let sigma = tree1[3];
409 w2 = fpr_of(samp(samp_ctx, x0, sigma) as i64);
410 w3 = fpr_of(samp(samp_ctx, x1, sigma) as i64);
411 let a_re = fpr_sub(x0, w2);
412 let a_im = fpr_sub(x1, w3);
413 let b_re = tree1[0];
414 let b_im = tree1[1];
415 let c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
416 let c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
417 let x0 = fpr_add(c_re, w0);
418 let x1 = fpr_add(c_im, w1);
419 let sigma = tree1[2];
420 w0 = fpr_of(samp(samp_ctx, x0, sigma) as i64);
421 w1 = fpr_of(samp(samp_ctx, x1, sigma) as i64);
422
423 let a_re = w0;
424 let a_im = w1;
425 let c_re = fpr_mul(fpr_sub(w2, w3), FPR_INVSQRT2);
426 let c_im = fpr_mul(fpr_add(w2, w3), FPR_INVSQRT2);
427 z1[0] = fpr_add(a_re, c_re);
428 z1[2] = fpr_add(a_im, c_im);
429 z1[1] = fpr_sub(a_re, c_re);
430 z1[3] = fpr_sub(a_im, c_im);
431
432 w0 = fpr_sub(t1[0], z1[0]);
434 w1 = fpr_sub(t1[1], z1[1]);
435 w2 = fpr_sub(t1[2], z1[2]);
436 w3 = fpr_sub(t1[3], z1[3]);
437
438 {
439 let (a_re, a_im) = (w0, w2);
440 let (b_re, b_im) = (tree[0], tree[2]);
441 w0 = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
442 w2 = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
443 }
444 {
445 let (a_re, a_im) = (w1, w3);
446 let (b_re, b_im) = (tree[1], tree[3]);
447 w1 = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
448 w3 = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
449 }
450
451 w0 = fpr_add(w0, t0[0]);
452 w1 = fpr_add(w1, t0[1]);
453 w2 = fpr_add(w2, t0[2]);
454 w3 = fpr_add(w3, t0[3]);
455
456 let a_re = w0;
458 let a_im = w2;
459 let b_re = w1;
460 let b_im = w3;
461 let c_re = fpr_add(a_re, b_re);
462 let c_im = fpr_add(a_im, b_im);
463 w0 = fpr_half(c_re);
464 w1 = fpr_half(c_im);
465 let c_re = fpr_sub(a_re, b_re);
466 let c_im = fpr_sub(a_im, b_im);
467 w2 = fpr_mul(fpr_add(c_re, c_im), FPR_INVSQRT8);
468 w3 = fpr_mul(fpr_sub(c_im, c_re), FPR_INVSQRT8);
469
470 let x0 = w2;
471 let x1 = w3;
472 let sigma = tree0[3];
473 let y0 = fpr_of(samp(samp_ctx, x0, sigma) as i64);
474 let y1 = fpr_of(samp(samp_ctx, x1, sigma) as i64);
475 w2 = y0;
476 w3 = y1;
477 let a_re = fpr_sub(x0, y0);
478 let a_im = fpr_sub(x1, y1);
479 let b_re = tree0[0];
480 let b_im = tree0[1];
481 let c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
482 let c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
483 let x0 = fpr_add(c_re, w0);
484 let x1 = fpr_add(c_im, w1);
485 let sigma = tree0[2];
486 w0 = fpr_of(samp(samp_ctx, x0, sigma) as i64);
487 w1 = fpr_of(samp(samp_ctx, x1, sigma) as i64);
488
489 let a_re = w0;
490 let a_im = w1;
491 let c_re = fpr_mul(fpr_sub(w2, w3), FPR_INVSQRT2);
492 let c_im = fpr_mul(fpr_add(w2, w3), FPR_INVSQRT2);
493 z0[0] = fpr_add(a_re, c_re);
494 z0[2] = fpr_add(a_im, c_im);
495 z0[1] = fpr_sub(a_re, c_re);
496 z0[3] = fpr_sub(a_im, c_im);
497
498 return;
499 }
500
501 if logn == 1 {
503 let x0 = t1[0];
504 let x1 = t1[1];
505 let sigma = tree[3];
506 let y0 = fpr_of(samp(samp_ctx, x0, sigma) as i64);
507 let y1 = fpr_of(samp(samp_ctx, x1, sigma) as i64);
508 z1[0] = y0;
509 z1[1] = y1;
510 let a_re = fpr_sub(x0, y0);
511 let a_im = fpr_sub(x1, y1);
512 let b_re = tree[0];
513 let b_im = tree[1];
514 let c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
515 let c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
516 let x0 = fpr_add(c_re, t0[0]);
517 let x1 = fpr_add(c_im, t0[1]);
518 let sigma = tree[2];
519 z0[0] = fpr_of(samp(samp_ctx, x0, sigma) as i64);
520 z0[1] = fpr_of(samp(samp_ctx, x1, sigma) as i64);
521 return;
522 }
523
524 let n: usize = 1 << logn;
526 let hn = n >> 1;
527 let tree0 = &tree[n..];
528 let tree1 = &tree[n + ffldl_treesize(logn - 1)..];
529
530 {
532 let (z1_lo, z1_hi) = z1.split_at_mut(hn);
533 fft::poly_split_fft(z1_lo, z1_hi, t1, logn);
534 }
535
536 {
538 let (tmp_lo, tmp_rest) = tmp.split_at_mut(hn);
539 let (tmp_hi, scratch) = tmp_rest.split_at_mut(hn);
540 let (z1_lo, z1_hi) = z1.split_at_mut(hn);
541 ff_sampling_fft(
542 samp,
543 samp_ctx,
544 tmp_lo,
545 tmp_hi,
546 tree1,
547 z1_lo,
548 z1_hi,
549 logn - 1,
550 scratch,
551 );
552 }
553 {
554 let mut tmp_lo_copy = vec![FPR_ZERO; hn];
555 let mut tmp_hi_copy = vec![FPR_ZERO; hn];
556 tmp_lo_copy.copy_from_slice(&tmp[..hn]);
557 tmp_hi_copy.copy_from_slice(&tmp[hn..n]);
558 fft::poly_merge_fft(z1, &tmp_lo_copy, &tmp_hi_copy, logn);
559 }
560
561 tmp[..n].copy_from_slice(&t1[..n]);
563 fft::poly_sub(&mut tmp[..n], &z1[..n], logn);
564 fft::poly_mul_fft(&mut tmp[..n], tree, logn);
565 fft::poly_add(&mut tmp[..n], t0, logn);
566
567 {
569 let (z0_lo, z0_hi) = z0.split_at_mut(hn);
570 fft::poly_split_fft(z0_lo, z0_hi, &tmp[..n], logn);
571 }
572 {
573 let (tmp_lo, tmp_rest) = tmp.split_at_mut(hn);
574 let (tmp_hi, scratch) = tmp_rest.split_at_mut(hn);
575 let (z0_lo, z0_hi) = z0.split_at_mut(hn);
576 ff_sampling_fft(
577 samp,
578 samp_ctx,
579 tmp_lo,
580 tmp_hi,
581 tree0,
582 z0_lo,
583 z0_hi,
584 logn - 1,
585 scratch,
586 );
587 }
588 {
589 let mut tmp_lo_copy = vec![FPR_ZERO; hn];
590 let mut tmp_hi_copy = vec![FPR_ZERO; hn];
591 tmp_lo_copy.copy_from_slice(&tmp[..hn]);
592 tmp_hi_copy.copy_from_slice(&tmp[hn..n]);
593 fft::poly_merge_fft(z0, &tmp_lo_copy, &tmp_hi_copy, logn);
594 }
595}
596
597fn do_sign_tree(
604 samp: SamplerZ,
605 samp_ctx: &mut SamplerContext,
606 s2: &mut [i16],
607 expanded_key: &[Fpr],
608 hm: &[u16],
609 logn: u32,
610 tmp: &mut [Fpr],
611) -> bool {
612 let n: usize = 1 << logn;
613
614 let b00 = &expanded_key[skoff_b00(logn)..];
615 let b01 = &expanded_key[skoff_b01(logn)..];
616 let b10 = &expanded_key[skoff_b10(logn)..];
617 let b11 = &expanded_key[skoff_b11(logn)..];
618 let tree = &expanded_key[skoff_tree(logn)..];
619
620 for u in 0..n {
622 tmp[u] = fpr_of(hm[u] as i64);
623 }
624
625 fft::fft(&mut tmp[0..n], logn);
627 let ni = FPR_INVERSE_OF_Q;
628 debug_assert!(tmp.len() >= 4 * n, "do_sign_tree: tmp too small");
629 unsafe {
630 let p = tmp.as_mut_ptr();
631 core::ptr::copy(p, p.add(n), n);
632 }
633 fft::poly_mul_fft(&mut tmp[n..2 * n], &b01[..n], logn);
634 fft::poly_mulconst(&mut tmp[n..2 * n], fpr_neg(ni), logn);
635 fft::poly_mul_fft(&mut tmp[0..n], &b11[..n], logn);
636 fft::poly_mulconst(&mut tmp[0..n], ni, logn);
637
638 {
641 let ptr = tmp.as_mut_ptr();
644 debug_assert!(tmp.len() >= 5 * n, "do_sign_tree: tmp too small for sampling");
645 let t0 = unsafe { core::slice::from_raw_parts(ptr, n) };
646 let t1 = unsafe { core::slice::from_raw_parts(ptr.add(n), n) };
647 let tx = unsafe { core::slice::from_raw_parts_mut(ptr.add(2 * n), n) };
648 let ty = unsafe { core::slice::from_raw_parts_mut(ptr.add(3 * n), n) };
649 let scratch = unsafe { core::slice::from_raw_parts_mut(ptr.add(4 * n), tmp.len() - 4 * n) };
650 ff_sampling_fft(samp, samp_ctx, tx, ty, tree, t0, t1, logn, scratch);
651 }
652
653 {
656 let ptr = tmp.as_mut_ptr();
657 unsafe {
658 core::ptr::copy_nonoverlapping(ptr.add(2 * n), ptr, n);
660 core::ptr::copy_nonoverlapping(ptr.add(3 * n), ptr.add(n), n);
662 }
663 }
664 fft::poly_mul_fft(&mut tmp[2 * n..3 * n], &b00[..n], logn);
666 fft::poly_mul_fft(&mut tmp[3 * n..4 * n], &b10[..n], logn);
668 {
670 let (front, back) = tmp.split_at_mut(3 * n);
671 fft::poly_add(&mut front[2 * n..], &back[..n], logn);
672 }
673 {
675 let ptr = tmp.as_mut_ptr();
676 unsafe {
677 core::ptr::copy_nonoverlapping(ptr, ptr.add(3 * n), n);
678 }
679 }
680 fft::poly_mul_fft(&mut tmp[3 * n..4 * n], &b01[..n], logn);
681
682 {
684 let ptr = tmp.as_mut_ptr();
685 unsafe {
686 core::ptr::copy_nonoverlapping(ptr.add(2 * n), ptr, n);
687 }
688 }
689 fft::poly_mul_fft(&mut tmp[n..2 * n], &b11[..n], logn);
691 {
693 let (front, back) = tmp.split_at_mut(3 * n);
694 fft::poly_add(&mut front[n..2 * n], &back[..n], logn);
695 }
696
697 fft::ifft(&mut tmp[0..n], logn);
698 fft::ifft(&mut tmp[n..2 * n], logn);
699
700 let s1tmp: &mut [i16] =
702 unsafe { core::slice::from_raw_parts_mut(tmp[2 * n..].as_mut_ptr() as *mut i16, n) };
703 let mut sqn: u32 = 0;
704 let mut ng: u32 = 0;
705 for u in 0..n {
706 let z = (hm[u] as i32) - (fpr_rint(tmp[u]) as i32);
707 sqn = sqn.wrapping_add((z * z) as u32);
708 ng |= sqn;
709 s1tmp[u] = z as i16;
710 }
711 sqn |= (ng >> 31).wrapping_neg();
712
713 let mut s2_vals: Vec<i16> = Vec::with_capacity(n);
715 for u in 0..n {
716 s2_vals.push(-(fpr_rint(tmp[n + u]) as i16));
717 }
718
719 if is_short_half(sqn, &s2_vals, logn) {
720 s2[..n].copy_from_slice(&s2_vals);
721 let s1_out: &mut [i16] =
723 unsafe { core::slice::from_raw_parts_mut(tmp.as_mut_ptr() as *mut i16, n) };
724 s1_out[..n].copy_from_slice(&s1tmp[..n]);
725 return true;
726 }
727 false
728}
729
730fn do_sign_dyn(
740 samp: SamplerZ,
741 samp_ctx: &mut SamplerContext,
742 s2: &mut [i16],
743 f: &[i8],
744 g: &[i8],
745 big_f: &[i8],
746 big_g: &[i8],
747 hm: &[u16],
748 logn: u32,
749 tmp: &mut [Fpr],
750) -> bool {
751 let n: usize = 1 << logn;
752 let ptr = tmp.as_mut_ptr();
753 debug_assert!(tmp.len() >= 9 * n, "do_sign_dyn: tmp too small");
754
755 {
758 let b00 = unsafe { core::slice::from_raw_parts_mut(ptr, n) };
759 let b01 = unsafe { core::slice::from_raw_parts_mut(ptr.add(n), n) };
760 let b10 = unsafe { core::slice::from_raw_parts_mut(ptr.add(2 * n), n) };
761 let b11 = unsafe { core::slice::from_raw_parts_mut(ptr.add(3 * n), n) };
762
763 smallints_to_fpr(b01, f, logn);
764 smallints_to_fpr(b00, g, logn);
765 smallints_to_fpr(b11, big_f, logn);
766 smallints_to_fpr(b10, big_g, logn);
767 fft::fft(b01, logn);
768 fft::fft(b00, logn);
769 fft::fft(b11, logn);
770 fft::fft(b10, logn);
771 fft::poly_neg(b01, logn);
772 fft::poly_neg(b11, logn);
773 }
774
775 {
778 let b00 = unsafe { core::slice::from_raw_parts_mut(ptr, n) };
779 let b01 = unsafe { core::slice::from_raw_parts_mut(ptr.add(n), n) };
780 let b10 = unsafe { core::slice::from_raw_parts_mut(ptr.add(2 * n), n) };
781 let b11 = unsafe { core::slice::from_raw_parts_mut(ptr.add(3 * n), n) };
782 let t0 = unsafe { core::slice::from_raw_parts_mut(ptr.add(4 * n), n) };
783 let t1 = unsafe { core::slice::from_raw_parts_mut(ptr.add(5 * n), n) };
784
785 t0.copy_from_slice(b01);
787 fft::poly_mulselfadj_fft(t0, logn);
788
789 t1.copy_from_slice(b00);
791 fft::poly_muladj_fft(t1, b10, logn);
792
793 fft::poly_mulselfadj_fft(b00, logn);
795 fft::poly_add(b00, t0, logn);
796
797 t0.copy_from_slice(b01);
799 fft::poly_muladj_fft(b01, b11, logn);
800 fft::poly_add(b01, t1, logn);
801
802 fft::poly_mulselfadj_fft(b10, logn);
804 t1.copy_from_slice(b11);
805 fft::poly_mulselfadj_fft(t1, logn);
806 fft::poly_add(b10, t1, logn);
807 }
808
809 {
815 let t0 = unsafe { core::slice::from_raw_parts_mut(ptr.add(5 * n), n) };
816 for u in 0..n {
817 t0[u] = fpr_of(hm[u] as i64);
818 }
819 }
820
821 {
823 let t0 = unsafe { core::slice::from_raw_parts_mut(ptr.add(5 * n), n) };
824 let t1 = unsafe { core::slice::from_raw_parts_mut(ptr.add(6 * n), n) };
825 let b01_saved = unsafe { core::slice::from_raw_parts(ptr.add(4 * n), n) };
826 let b11 = unsafe { core::slice::from_raw_parts(ptr.add(3 * n), n) };
827
828 fft::fft(t0, logn);
829 let ni = FPR_INVERSE_OF_Q;
830 t1.copy_from_slice(t0);
831 fft::poly_mul_fft(t1, b01_saved, logn);
832 fft::poly_mulconst(t1, fpr_neg(ni), logn);
833 fft::poly_mul_fft(t0, b11, logn);
834 fft::poly_mulconst(t0, ni, logn);
835 }
836
837 unsafe {
839 core::ptr::copy(ptr.add(5 * n), ptr.add(3 * n), 2 * n);
840 }
841
842 {
847 let g00 = unsafe { core::slice::from_raw_parts_mut(ptr, n) };
848 let g01 = unsafe { core::slice::from_raw_parts_mut(ptr.add(n), n) };
849 let g11 = unsafe { core::slice::from_raw_parts_mut(ptr.add(2 * n), n) };
850 let t0 = unsafe { core::slice::from_raw_parts_mut(ptr.add(3 * n), n) };
851 let t1 = unsafe { core::slice::from_raw_parts_mut(ptr.add(4 * n), n) };
852 let scratch = unsafe { core::slice::from_raw_parts_mut(ptr.add(5 * n), tmp.len() - 5 * n) };
853 ff_sampling_fft_dyntree(samp, samp_ctx, t0, t1, g00, g01, g11, logn, logn, scratch);
854 }
855
856 unsafe {
859 core::ptr::copy(ptr.add(3 * n), ptr.add(5 * n), 2 * n);
860 }
861
862 {
864 let b00 = unsafe { core::slice::from_raw_parts_mut(ptr, n) };
865 let b01 = unsafe { core::slice::from_raw_parts_mut(ptr.add(n), n) };
866 let b10 = unsafe { core::slice::from_raw_parts_mut(ptr.add(2 * n), n) };
867 let b11 = unsafe { core::slice::from_raw_parts_mut(ptr.add(3 * n), n) };
868
869 smallints_to_fpr(b01, f, logn);
870 smallints_to_fpr(b00, g, logn);
871 smallints_to_fpr(b11, big_f, logn);
872 smallints_to_fpr(b10, big_g, logn);
873 fft::fft(b01, logn);
874 fft::fft(b00, logn);
875 fft::fft(b11, logn);
876 fft::fft(b10, logn);
877 fft::poly_neg(b01, logn);
878 fft::poly_neg(b11, logn);
879 }
880
881 unsafe {
884 core::ptr::copy_nonoverlapping(ptr.add(5 * n), ptr.add(7 * n), n);
885 core::ptr::copy_nonoverlapping(ptr.add(6 * n), ptr.add(8 * n), n);
886 }
887
888 {
890 let b00 = unsafe { core::slice::from_raw_parts(ptr, n) };
891 let b01 = unsafe { core::slice::from_raw_parts(ptr.add(n), n) };
892 let b10 = unsafe { core::slice::from_raw_parts(ptr.add(2 * n), n) };
893 let _b11 = unsafe { core::slice::from_raw_parts(ptr.add(3 * n), n) };
894 let tx = unsafe { core::slice::from_raw_parts_mut(ptr.add(7 * n), n) };
895 let ty = unsafe { core::slice::from_raw_parts_mut(ptr.add(8 * n), n) };
896
897 fft::poly_mul_fft(tx, b00, logn);
898 fft::poly_mul_fft(ty, b10, logn);
899 fft::poly_add(tx, ty, logn);
900
901 let t0_slice = unsafe { core::slice::from_raw_parts(ptr.add(5 * n), n) };
903 ty.copy_from_slice(t0_slice);
904 fft::poly_mul_fft(ty, b01, logn);
905 }
906
907 unsafe {
909 core::ptr::copy_nonoverlapping(ptr.add(7 * n), ptr.add(5 * n), n);
910 }
911
912 {
914 let t1 = unsafe { core::slice::from_raw_parts_mut(ptr.add(6 * n), n) };
915 let b11 = unsafe { core::slice::from_raw_parts(ptr.add(3 * n), n) };
916 fft::poly_mul_fft(t1, b11, logn);
917 }
918
919 {
921 let t1 = unsafe { core::slice::from_raw_parts_mut(ptr.add(6 * n), n) };
922 let ty = unsafe { core::slice::from_raw_parts(ptr.add(8 * n), n) };
923 fft::poly_add(t1, ty, logn);
924 }
925
926 {
928 let t0 = unsafe { core::slice::from_raw_parts_mut(ptr.add(5 * n), n) };
929 fft::ifft(t0, logn);
930 }
931 {
932 let t1 = unsafe { core::slice::from_raw_parts_mut(ptr.add(6 * n), n) };
933 fft::ifft(t1, logn);
934 }
935
936 let s1tmp: &mut [i16] =
938 unsafe { core::slice::from_raw_parts_mut(ptr.add(7 * n) as *mut i16, n) };
939 let mut sqn: u32 = 0;
940 let mut ng: u32 = 0;
941 for u in 0..n {
942 let t0_u = unsafe { *ptr.add(5 * n + u) };
943 let z = (hm[u] as i32) - (fpr_rint(t0_u) as i32);
944 sqn = sqn.wrapping_add((z * z) as u32);
945 ng |= sqn;
946 s1tmp[u] = z as i16;
947 }
948 sqn |= (ng >> 31).wrapping_neg();
949
950 let mut s2_vals: Vec<i16> = Vec::with_capacity(n);
951 for u in 0..n {
952 let t1_u = unsafe { *ptr.add(6 * n + u) };
953 s2_vals.push(-(fpr_rint(t1_u) as i16));
954 }
955
956 if is_short_half(sqn, &s2_vals, logn) {
957 s2[..n].copy_from_slice(&s2_vals);
958 let s1_out: &mut [i16] = unsafe { core::slice::from_raw_parts_mut(ptr as *mut i16, n) };
959 s1_out[..n].copy_from_slice(&s1tmp[..n]);
960 return true;
961 }
962 false
963}
964
965static GAUSS0_DIST: [u32; 54] = [
971 10745844, 3068844, 3741698, 5559083, 1580863, 8248194, 2260429, 13669192, 2736639, 708981,
972 4421575, 10046180, 169348, 7122675, 4136815, 30538, 13063405, 7650655, 4132, 14505003, 7826148,
973 417, 16768101, 11363290, 31, 8444042, 8086568, 1, 12844466, 265321, 0, 1232676, 13644283, 0,
974 38047, 9111839, 0, 870, 6138264, 0, 14, 12545723, 0, 0, 3104126, 0, 0, 28824, 0, 0, 198, 0, 0,
975 1,
976];
977
978pub fn gaussian0_sampler(p: &mut Prng) -> i32 {
981 let lo = prng_get_u64(p);
982 let hi = prng_get_u8(p);
983 let v0 = (lo as u32) & 0xFFFFFF;
984 let v1 = ((lo >> 24) as u32) & 0xFFFFFF;
985 let v2 = ((lo >> 48) as u32) | (hi << 16);
986
987 let mut z: i32 = 0;
988 let mut u = 0;
989 while u < GAUSS0_DIST.len() {
990 unsafe {
991 let w0 = *GAUSS0_DIST.get_unchecked(u + 2);
992 let w1 = *GAUSS0_DIST.get_unchecked(u + 1);
993 let w2 = *GAUSS0_DIST.get_unchecked(u);
994 let cc = v0.wrapping_sub(w0) >> 31;
995 let cc = v1.wrapping_sub(w1).wrapping_sub(cc) >> 31;
996 let cc = v2.wrapping_sub(w2).wrapping_sub(cc) >> 31;
997 z += cc as i32;
998 }
999 u += 3;
1000 }
1001 z
1002}
1003
1004fn ber_exp(p: &mut Prng, x: Fpr, ccs: Fpr) -> bool {
1006 let s = fpr_trunc(fpr_mul(x, FPR_INV_LOG2)) as i32;
1007 let r = fpr_sub(x, fpr_mul(fpr_of(s as i64), FPR_LOG2));
1008
1009 let mut sw = s as u32;
1011 sw ^= (sw ^ 63) & (63u32.wrapping_sub(sw) >> 31).wrapping_neg();
1012 let s = sw as i32;
1013
1014 let z = ((fpr_expm_p63(r, ccs) << 1).wrapping_sub(1)) >> (s as u32);
1015
1016 let mut i: i32 = 64;
1017 let mut w: u32;
1018 loop {
1019 i -= 8;
1020 w = prng_get_u8(p).wrapping_sub(((z >> (i as u32)) & 0xFF) as u32);
1021 if w != 0 || i <= 0 {
1022 break;
1023 }
1024 }
1025 (w >> 31) != 0
1026}
1027
1028pub fn sampler(ctx: &mut SamplerContext, mu: Fpr, isigma: Fpr) -> i32 {
1031 let s = fpr_floor(mu) as i32;
1032 let r = fpr_sub(mu, fpr_of(s as i64));
1033 let dss = fpr_half(fpr_sqr(isigma));
1034 let ccs = fpr_mul(isigma, ctx.sigma_min);
1035
1036 for _ in 0..1000 {
1039 let z0 = gaussian0_sampler(&mut ctx.p);
1040 let b = (prng_get_u8(&mut ctx.p) & 1) as i32;
1041 let z = b + ((b << 1) - 1) * z0;
1042
1043 let x = fpr_mul(fpr_sqr(fpr_sub(fpr_of(z as i64), r)), dss);
1044 let x = fpr_sub(x, fpr_mul(fpr_of((z0 * z0) as i64), FPR_INV_2SQRSIGMA0));
1045 if ber_exp(&mut ctx.p, x, ccs) {
1046 return s + z;
1047 }
1048 }
1049 panic!("PRNG corruption: discrete Gaussian sampler failed after 1000 iterations")
1051}
1052
1053pub fn sign_tree(
1059 sig: &mut [i16],
1060 rng: &mut InnerShake256Context,
1061 expanded_key: &[Fpr],
1062 hm: &[u16],
1063 logn: u32,
1064 tmp: &mut [u8],
1065) {
1066 let ftmp: &mut [Fpr] = unsafe {
1067 debug_assert!(
1068 tmp.as_ptr() as usize % core::mem::align_of::<Fpr>() == 0
1069 || tmp.len() / core::mem::size_of::<Fpr>() == 0,
1070 "sign_tree: tmp not Fpr-aligned"
1071 );
1072 core::slice::from_raw_parts_mut(
1073 tmp.as_mut_ptr() as *mut Fpr,
1074 tmp.len() / core::mem::size_of::<Fpr>(),
1075 )
1076 };
1077 loop {
1078 let mut spc = SamplerContext {
1079 p: Prng::new(),
1080 sigma_min: FPR_SIGMA_MIN[logn as usize],
1081 };
1082 prng_init(&mut spc.p, rng);
1083
1084 if do_sign_tree(sampler, &mut spc, sig, expanded_key, hm, logn, ftmp) {
1085 break;
1086 }
1087 }
1088}
1089
1090pub fn sign_dyn(
1092 sig: &mut [i16],
1093 rng: &mut InnerShake256Context,
1094 f: &[i8],
1095 g: &[i8],
1096 big_f: &[i8],
1097 big_g: &[i8],
1098 hm: &[u16],
1099 logn: u32,
1100 tmp: &mut [u8],
1101) {
1102 let ftmp: &mut [Fpr] = unsafe {
1103 debug_assert!(
1104 tmp.as_ptr() as usize % core::mem::align_of::<Fpr>() == 0
1105 || tmp.len() / core::mem::size_of::<Fpr>() == 0,
1106 "sign_dyn: tmp not Fpr-aligned"
1107 );
1108 core::slice::from_raw_parts_mut(
1109 tmp.as_mut_ptr() as *mut Fpr,
1110 tmp.len() / core::mem::size_of::<Fpr>(),
1111 )
1112 };
1113 loop {
1114 let mut spc = SamplerContext {
1115 p: Prng::new(),
1116 sigma_min: FPR_SIGMA_MIN[logn as usize],
1117 };
1118 prng_init(&mut spc.p, rng);
1119
1120 if do_sign_dyn(sampler, &mut spc, sig, f, g, big_f, big_g, hm, logn, ftmp) {
1121 break;
1122 }
1123 }
1124}