scrypt_opt/
lib.rs

1#![doc = include_str!("../README.md")]
2#![cfg_attr(
3    all(not(test), not(feature = "std"), not(target_arch = "wasm32")),
4    no_std
5)]
6#![cfg_attr(feature = "portable-simd", feature(portable_simd))]
7#![warn(missing_docs)]
8
9#[cfg(feature = "alloc")]
10extern crate alloc;
11
12#[rustfmt::skip]
13macro_rules! repeat4 {
14    ($i:ident, $c:block) => {
15        { let $i = 0; $c; }
16        { let $i = 1; $c; }
17        { let $i = 2; $c; }
18        { let $i = 3; $c; }
19    };
20}
21
22#[rustfmt::skip]
23macro_rules! repeat8 {
24    ($i:ident, $b:block) => {{
25        repeat4!(di, { let $i = di; $b });
26        repeat4!(di, { let $i = di + 4; $b });
27    }};
28}
29
30/// Re-export sha2
31pub use sha2;
32
33/// Re-export generic_array
34pub use generic_array;
35
36/// Algorithmic Self-Test (CAST)
37pub mod self_test;
38
39/// Memory utilities
40pub mod memory;
41
42/// Salsa20 kernels
43pub mod salsa20;
44
45/// SIMD utilities
46pub(crate) mod simd;
47
48/// PBKDF2-HMAC-SHA256 implementation (1 iteration special case)
49pub mod pbkdf2_1;
50
51/// Pipeline support
52pub mod pipeline;
53
54/// Multi-buffer SHA256 implementation
55#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
56pub(crate) mod sha2_mb;
57
58/// Runtime feature detection
59pub mod features;
60
61/// Compat APIs
62#[cfg(any(feature = "std", target_arch = "wasm32"))]
63pub mod compat;
64
65/// Fixed R buffer set
66pub mod fixed_r;
67
68use core::num::{NonZeroU8, NonZeroU32};
69
70use generic_array::typenum::{
71    B1, IsLess, IsLessOrEqual, PowerOfTwo, U1, U2, U3, U4, U5, U6, U7, U8, U9, U10, U11, U12, U13,
72    U14, U15, U16, U17, U18, U19, U20, U21, U22, U23, U24, U25, U26, U27, U28, U29, U30, U31,
73    U4294967296, Unsigned,
74};
75
76use generic_array::{ArrayLength, GenericArray, typenum::NonZero};
77
78#[allow(unused_imports)]
79use crate::features::Feature as _;
80
81use crate::memory::Align64;
82use crate::salsa20::{BlockType, Salsa20};
83
84include!("block_mix.rs");
85
86// rough order:
87// 1. kernels that I know is optimal
88// 2. portable simd
89// 3. kernels that should work better than scalar
90// 4. scalar
91#[cfg(target_arch = "x86_64")]
92cfg_if::cfg_if! {
93    if #[cfg(target_feature = "avx512f")] {
94        /// The default engine for this architecture that is guaranteed to be available
95        pub type DefaultEngine1 = salsa20::x86_64::BlockAvx512F;
96        /// The default engine for this architecture that is guaranteed to be available
97        pub type DefaultEngine2 = salsa20::x86_64::BlockAvx512FMb2;
98    } else if #[cfg(target_feature = "avx2")] {
99        /// The default engine for this architecture that is guaranteed to be available
100        pub type DefaultEngine1 = salsa20::x86_64::BlockSse2<U1>;
101        /// The default engine for this architecture that is guaranteed to be available
102        pub type DefaultEngine2 = salsa20::x86_64::BlockAvx2Mb2;
103    } else if #[cfg(feature = "portable-simd")] {
104        /// The default engine for this architecture that is guaranteed to be available
105        pub type DefaultEngine1 = salsa20::BlockPortableSimd;
106        /// The default engine for this architecture that is guaranteed to be available
107        pub type DefaultEngine2 = salsa20::BlockPortableSimd2;
108    } else if #[cfg(target_arch = "x86_64")] {
109        /// The default engine for this architecture that is guaranteed to be available
110        pub type DefaultEngine1 = salsa20::x86_64::BlockSse2<U1>;
111        /// The default engine for this architecture that is guaranteed to be available
112        pub type DefaultEngine2 = salsa20::x86_64::BlockSse2<U2>;
113    } else {
114        /// The default engine for this architecture that is guaranteed to be available
115        pub type DefaultEngine1 = salsa20::BlockScalar<U1>;
116        /// The default engine for this architecture that is guaranteed to be available
117        pub type DefaultEngine2 = salsa20::BlockScalar<U2>;
118    }
119}
120
121#[cfg(not(target_arch = "x86_64"))]
122cfg_if::cfg_if! {
123    if #[cfg(feature = "portable-simd")] {
124        /// The default engine for this architecture that is guaranteed to be available
125        pub type DefaultEngine1 = salsa20::BlockPortableSimd;
126        /// The default engine for this architecture that is guaranteed to be available
127        pub type DefaultEngine2 = salsa20::BlockPortableSimd2;
128    } else {
129        /// The default engine for this architecture that is guaranteed to be available
130        pub type DefaultEngine1 = salsa20::BlockScalar<U1>;
131        /// The default engine for this architecture that is guaranteed to be available
132        pub type DefaultEngine2 = salsa20::BlockScalar<U2>;
133    }
134}
135
136mod sealing {
137    pub trait Sealed {}
138}
139
140/// A trait for valid cost factors
141pub trait ValidCostFactor: Unsigned + NonZero + sealing::Sealed {
142    /// The output type
143    type Output: ArrayLength + PowerOfTwo + NonZero + IsLess<U4294967296, Output = B1>;
144
145    /// The minimum number of blocks required for a given Cost Factor (log2(N))
146    type MinimumBlocks: ArrayLength + NonZero + IsLessOrEqual<U4294967296, Output = B1>;
147}
148
149const MAX_CF: u8 = 31;
150const MAX_N: u32 = 1 << MAX_CF;
151
152macro_rules! impl_valid_cost_factor {
153    ($($base:ty),*) => {
154        $(
155            impl sealing::Sealed for $base {}
156            impl ValidCostFactor for $base {
157                type Output = <U1 as core::ops::Shl<$base>>::Output;
158                type MinimumBlocks = <<U1 as core::ops::Shl<$base>>::Output as core::ops::Add<U2>>::Output;
159            }
160        )*
161    };
162}
163
164impl_valid_cost_factor!(
165    U1, U2, U3, U4, U5, U6, U7, U8, U9, U10, U11, U12, U13, U14, U15, U16, U17, U18, U19, U20, U21,
166    U22, U23, U24, U25, U26, U27, U28, U29, U30, U31
167);
168
169/// Generalized RoMix interface with a runtime R value
170pub trait RoMix {
171    /// Perform the front part of the $RoMix$ operation
172    ///
173    /// Buffer must be at least 128 * r * (n + 1) bytes long.
174    fn ro_mix_front_ex<S: Salsa20<Lanes = U1>>(&mut self, r: NonZeroU32, cf: NonZeroU8);
175    /// Perform the back part of the $RoMix$ operation
176    ///
177    /// Buffer must be at least 128 * r * (n + 2) bytes long.
178    ///
179    /// Return: the raw salt output for the completed $RoMix$ operation
180    fn ro_mix_back_ex<S: Salsa20<Lanes = U1>>(&mut self, r: NonZeroU32, cf: NonZeroU8) -> &[u8];
181    /// Interleave the front and back parts of the $RoMix$ operation in two independent buffers
182    ///
183    /// Buffer must be at least 128 * r * (n + 2) bytes long.
184    ///
185    /// Return: the raw salt output for the completed $RoMix$ operation
186    fn ro_mix_interleaved_ex<'a, S: Salsa20<Lanes = U2>>(
187        &'a mut self,
188        front: &mut Self,
189        r: NonZeroU32,
190        cf: NonZeroU8,
191    ) -> &'a [u8];
192
193    /// Convenience method to get the input buffer for the $RoMix$ operation
194    ///
195    /// Always return the 128 * r bytes of the buffer
196    fn ro_mix_input_buffer(&mut self, r: NonZeroU32) -> &mut [u8];
197
198    /// Perform the front part of the $RoMix$ operation
199    ///
200    /// Buffer must be at least 128 * r * (n + 1) bytes long.
201    fn ro_mix_front(&mut self, r: NonZeroU32, cf: NonZeroU8) {
202        self.ro_mix_front_ex::<DefaultEngine1>(r, cf);
203    }
204    /// Perform the back part of the $RoMix$ operation
205    ///
206    /// Buffer must be at least 128 * r * (n + 2) bytes long.
207    ///
208    /// Return: the raw salt output for the completed $RoMix$ operation
209    fn ro_mix_back(&mut self, r: NonZeroU32, cf: NonZeroU8) -> &[u8] {
210        self.ro_mix_back_ex::<DefaultEngine1>(r, cf)
211    }
212    /// Interleave the front and back parts of the $RoMix$ operation in two independent buffers
213    ///
214    /// Buffer must be at least 128 * r * (n + 2) bytes long.
215    ///
216    /// Return: the raw salt output for the completed $RoMix$ operation
217    fn ro_mix_interleaved(&mut self, front: &mut Self, r: NonZeroU32, cf: NonZeroU8) -> &[u8] {
218        self.ro_mix_interleaved_ex::<DefaultEngine2>(front, r, cf)
219    }
220}
221
222#[cfg_attr(
223    all(target_arch = "x86_64", not(target_feature = "avx2")),
224    scrypt_opt_derive::generate_target_variant("avx2")
225)]
226#[cfg_attr(
227    not(all(target_arch = "x86_64", not(target_feature = "avx2"))),
228    inline(always)
229)]
230fn ro_mix_front_ex_dyn<S: Salsa20<Lanes = U1>>(
231    v: &mut [Align64<fixed_r::Block<U1>>],
232    r: NonZeroU32,
233    cf: NonZeroU8,
234) {
235    let r = r.get() as usize;
236    let n = 1 << cf.get();
237    assert!(
238        v.len() >= r * (n + 1),
239        "ro_mix_front_ex: v.len() < r * (n + 1)"
240    );
241
242    // SAFETY: n is at least 1, v is at least r long
243    unsafe {
244        v.get_unchecked_mut(..r).iter_mut().for_each(|chunk| {
245            S::shuffle_in(
246                chunk
247                    .as_mut_ptr()
248                    .cast::<Align64<[u32; 16]>>()
249                    .as_mut()
250                    .unwrap(),
251            );
252            S::shuffle_in(
253                chunk
254                    .as_mut_ptr()
255                    .cast::<Align64<[u32; 16]>>()
256                    .add(1)
257                    .as_mut()
258                    .unwrap(),
259            );
260        });
261    }
262
263    for i in 0..n {
264        let [src, dst] = unsafe {
265            v.get_disjoint_unchecked_mut([(i * r)..((i + 1) * r), ((i + 1) * r)..((i + 2) * r)])
266        };
267        block_mix_dyn!(r; [<S> &*src => &mut *dst]);
268    }
269}
270
271#[cfg_attr(
272    all(target_arch = "x86_64", not(target_feature = "avx2")),
273    scrypt_opt_derive::generate_target_variant("avx2")
274)]
275#[cfg_attr(
276    not(all(target_arch = "x86_64", not(target_feature = "avx2"))),
277    inline(always)
278)]
279fn ro_mix_back_ex_dyn<S: Salsa20<Lanes = U1>>(
280    v: &mut [Align64<fixed_r::Block<U1>>],
281    r: NonZeroU32,
282    cf: NonZeroU8,
283) -> &[u8] {
284    let r = r.get() as usize;
285    let n = 1 << cf.get();
286    assert!(
287        v.len() >= r * (n + 2),
288        "pipeline_end_ex: v.len() < r * (n + 2)"
289    );
290
291    for _ in (0..n).step_by(2) {
292        let idx = unsafe {
293            v.as_ptr()
294                .add((n * r) as usize)
295                .cast::<u32>()
296                .add(r * 32 - 16)
297                .read()
298        } as usize;
299
300        let j = idx & (n - 1);
301
302        // SAFETY: the largest j value is n-1, so the largest index of the 3 is n+1, which is in bounds after the >=n+2 check
303        let [in0, in1, out] = unsafe {
304            v.get_disjoint_unchecked_mut([
305                (n * r)..((n + 1) * r),
306                (j * r)..((j + 1) * r),
307                ((n + 1) * r)..((n + 2) * r),
308            ])
309        };
310        block_mix_dyn!(r; [<S> &(&*in0, &*in1) => &mut *out]);
311        let idx2 = unsafe {
312            v.as_ptr()
313                .add(((n + 1) * r) as usize)
314                .cast::<u32>()
315                .add(r * 32 - 16)
316                .read()
317        } as usize;
318
319        let j2 = idx2 & (n - 1);
320
321        // SAFETY: the largest j2 value is n-1, so the largest index of the 3 is n+1, which is in bounds after the >=n+2 check
322        let [b, v, t] = unsafe {
323            v.get_disjoint_unchecked_mut([
324                (n * r)..((n + 1) * r),
325                (j2 * r)..((j2 + 1) * r),
326                ((n + 1) * r)..((n + 2) * r),
327            ])
328        };
329        block_mix_dyn!(r; [<S> &(&*v, &*t) => &mut *b]);
330    }
331
332    // SAFETY: n is at least 1, v is at least r * (n + 2) long
333    unsafe {
334        v.get_unchecked_mut(r * n..r * (n + 1))
335            .iter_mut()
336            .for_each(|chunk| {
337                S::shuffle_out(
338                    chunk
339                        .as_mut_ptr()
340                        .cast::<Align64<[u32; 16]>>()
341                        .as_mut()
342                        .unwrap(),
343                );
344                S::shuffle_out(
345                    chunk
346                        .as_mut_ptr()
347                        .cast::<Align64<[u32; 16]>>()
348                        .add(1)
349                        .as_mut()
350                        .unwrap(),
351                );
352            });
353
354        core::slice::from_raw_parts(v.as_ptr().add(r * n).cast::<u8>(), 128 * r)
355    }
356}
357
358#[cfg_attr(
359    all(target_arch = "x86_64", not(target_feature = "avx2")),
360    scrypt_opt_derive::generate_target_variant("avx2")
361)]
362#[cfg_attr(
363    not(all(target_arch = "x86_64", not(target_feature = "avx2"))),
364    inline(always)
365)]
366fn ro_mix_interleaved_ex_dyn<'a, S: Salsa20<Lanes = U2>>(
367    self_v: &mut [Align64<fixed_r::Block<U1>>],
368    other_v: &mut [Align64<fixed_r::Block<U1>>],
369    r: NonZeroU32,
370    cf: NonZeroU8,
371) -> &'a [u8] {
372    let r = r.get() as usize;
373    let n = 1 << cf.get();
374
375    assert!(
376        other_v.len() >= r * (n + 2),
377        "ro_mix_interleaved_ex: other_v.len() < r * (n + 2)"
378    );
379    assert!(
380        self_v.len() >= r * (n + 2),
381        "ro_mix_interleaved_ex: self_v.len() < r * (n + 2)"
382    );
383
384    // SAFETY: other_v is always 64-byte aligned
385    // SAFETY: other_v is at least r long
386    unsafe {
387        other_v.get_unchecked_mut(..r).iter_mut().for_each(|chunk| {
388            S::shuffle_in(
389                chunk
390                    .as_mut_ptr()
391                    .cast::<Align64<[u32; 16]>>()
392                    .as_mut()
393                    .unwrap(),
394            );
395            S::shuffle_in(
396                chunk
397                    .as_mut_ptr()
398                    .cast::<Align64<[u32; 16]>>()
399                    .add(1)
400                    .as_mut()
401                    .unwrap(),
402            );
403        });
404    }
405
406    for i in (0..n).step_by(2) {
407        // SAFETY: the largest i value is n-1, so the largest index is n+1, which is in bounds after the >=n+2 check
408        let [src, middle, dst] = unsafe {
409            other_v.get_disjoint_unchecked_mut([
410                (i * r)..((i + 1) * r),
411                ((i + 1) * r)..((i + 2) * r),
412                ((i + 2) * r)..((i + 3) * r),
413            ])
414        };
415
416        {
417            // Self: Compute T <- BlockMix(B ^ V[j])
418            // Other: Compute V[i+1] <- BlockMix(V[i])
419            let idx = unsafe {
420                self_v
421                    .as_ptr()
422                    .add((n * r) as usize)
423                    .cast::<u32>()
424                    .add(r * 32 - 16)
425                    .read()
426            } as usize;
427
428            let j = idx & (n - 1);
429
430            let [in0, in1, out] = unsafe {
431                self_v.get_disjoint_unchecked_mut([
432                    (j * r)..((j + 1) * r),
433                    (n * r)..((n + 1) * r),
434                    ((n + 1) * r)..((n + 2) * r),
435                ])
436            };
437
438            block_mix_dyn!(r; [<S> &&*src => &mut *middle, <S> &(&*in0, &*in1) => &mut *out]);
439        }
440
441        {
442            // Self: Compute B <- BlockMix(T ^ V[j'])
443            // Other: Compute V[i+2] <- BlockMix(V[i+1]) on last iteration it "naturally overflows" to V[n], so let B = V[n]
444            let idx2 = unsafe {
445                self_v
446                    .as_ptr()
447                    .add(((n + 1) * r) as usize)
448                    .cast::<u32>()
449                    .add(r * 32 - 16)
450                    .read()
451            } as usize;
452
453            let j2 = idx2 & (n - 1);
454            let [self_b, self_v, self_t] = unsafe {
455                self_v.get_disjoint_unchecked_mut([
456                    (n * r)..((n + 1) * r),
457                    (j2 * r)..((j2 + 1) * r),
458                    ((n + 1) * r)..((n + 2) * r),
459                ])
460            };
461
462            block_mix_dyn!(r; [<S> &*middle => &mut *dst, <S> &(&*self_v, &*self_t) => &mut *self_b]);
463        }
464    }
465    // SAFETY: n is at least 1, self_v is at least r * (n + 2) long
466    unsafe {
467        self_v
468            .get_unchecked_mut(r * n..r * (n + 1))
469            .iter_mut()
470            .for_each(|chunk| {
471                S::shuffle_out(
472                    chunk
473                        .as_mut_ptr()
474                        .cast::<Align64<[u32; 16]>>()
475                        .as_mut()
476                        .unwrap(),
477                );
478                S::shuffle_out(
479                    chunk
480                        .as_mut_ptr()
481                        .cast::<Align64<[u32; 16]>>()
482                        .add(1)
483                        .as_mut()
484                        .unwrap(),
485                );
486            });
487
488        core::slice::from_raw_parts(self_v.as_ptr().add(r * n).cast::<u8>(), 128 * r)
489    }
490}
491
492impl<Q: AsRef<[Align64<fixed_r::Block<U1>>]> + AsMut<[Align64<fixed_r::Block<U1>>]>> RoMix for Q {
493    fn ro_mix_input_buffer(&mut self, r: NonZeroU32) -> &mut [u8] {
494        let r = r.get() as usize;
495        let v = self.as_mut();
496        assert!(v.len() >= r, "ro_mix_input_buffer: v.len() <  r");
497        unsafe { core::slice::from_raw_parts_mut(v.as_mut_ptr().cast::<u8>(), 128 * r) }
498    }
499
500    fn ro_mix_front_ex<S: Salsa20<Lanes = U1>>(&mut self, r: NonZeroU32, cf: NonZeroU8) {
501        let v = self.as_mut();
502
503        #[cfg(all(target_arch = "x86_64", not(target_feature = "avx2")))]
504        {
505            if features::Avx2.check() {
506                unsafe { ro_mix_front_ex_dyn_avx2::<salsa20::x86_64::BlockSse2<U1>>(v, r, cf) }
507                return;
508            }
509        }
510
511        ro_mix_front_ex_dyn::<S>(v, r, cf)
512    }
513
514    fn ro_mix_back_ex<S: Salsa20<Lanes = U1>>(&mut self, r: NonZeroU32, cf: NonZeroU8) -> &[u8] {
515        let v = self.as_mut();
516
517        #[cfg(all(target_arch = "x86_64", not(target_feature = "avx2")))]
518        {
519            if features::Avx2.check() {
520                return unsafe {
521                    ro_mix_back_ex_dyn_avx2::<salsa20::x86_64::BlockSse2<U1>>(v, r, cf)
522                };
523            }
524        }
525
526        ro_mix_back_ex_dyn::<S>(v, r, cf)
527    }
528
529    fn ro_mix_interleaved_ex<'a, S: Salsa20<Lanes = U2>>(
530        &'a mut self,
531        front: &mut Self,
532        r: NonZeroU32,
533        cf: NonZeroU8,
534    ) -> &'a [u8] {
535        let self_v = self.as_mut();
536        let other_v = front.as_mut();
537
538        #[cfg(all(target_arch = "x86_64", not(target_feature = "avx2")))]
539        {
540            if features::Avx2.check() {
541                return unsafe {
542                    ro_mix_interleaved_ex_dyn_avx2::<salsa20::x86_64::BlockAvx2Mb2>(
543                        self_v, other_v, r, cf,
544                    )
545                };
546            }
547        }
548
549        ro_mix_interleaved_ex_dyn::<S>(self_v, other_v, r, cf)
550    }
551}
552
553/// Trait for loading a block from a buffer
554pub trait ScryptBlockMixInput<'a, B: BlockType> {
555    /// Load a block from the buffer
556    unsafe fn load(&self, word_idx: usize) -> B;
557}
558
559impl<'a, B: BlockType> ScryptBlockMixInput<'a, B> for &'a [Align64<fixed_r::Block<U1>>] {
560    #[inline(always)]
561    unsafe fn load(&self, word_idx: usize) -> B {
562        unsafe { B::read_from_ptr(self.as_ptr().cast::<[u8; 64]>().add(word_idx).cast()) }
563    }
564}
565
566impl<'a, B: BlockType, Lhs: ScryptBlockMixInput<'a, B>, Rhs: ScryptBlockMixInput<'a, B>>
567    ScryptBlockMixInput<'a, B> for (Lhs, Rhs)
568{
569    #[inline(always)]
570    unsafe fn load(&self, word_idx: usize) -> B {
571        let mut x0 = unsafe { self.0.load(word_idx) };
572        let x1 = unsafe { self.1.load(word_idx) };
573        x0.xor_with(x1);
574        x0
575    }
576}
577
578/// Trait for storing a block to a buffer
579pub trait ScryptBlockMixOutput<'a, R: ArrayLength, B: BlockType> {
580    /// Store even-numbered words
581    fn store_even(&mut self, word_idx: usize, value: B);
582    /// Store odd-numbered words
583    fn store_odd(&mut self, word_idx: usize, value: B);
584}
585
586impl<
587    'a,
588    R: ArrayLength,
589    B: BlockType,
590    U: ScryptBlockMixOutput<'a, R, B>,
591    V: ScryptBlockMixOutput<'a, R, B>,
592> ScryptBlockMixOutput<'a, R, B> for (U, V)
593{
594    #[inline(always)]
595    fn store_even(&mut self, word_idx: usize, value: B) {
596        self.0.store_even(word_idx, value);
597        self.1.store_even(word_idx, value);
598    }
599    #[inline(always)]
600    fn store_odd(&mut self, word_idx: usize, value: B) {
601        self.0.store_odd(word_idx, value);
602        self.1.store_odd(word_idx, value);
603    }
604}
605
606#[cfg(test)]
607mod tests {
608    use generic_array::typenum::{U1, U2, U4, U8, U16};
609
610    use super::*;
611    use crate::{
612        fixed_r::{Block, BufferSet},
613        pbkdf2_1::Pbkdf2HmacSha256State,
614        pipeline::PipelineContext,
615    };
616
617    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
618    fn test_ro_mix_cas_zmm<R: ArrayLength + NonZero>() {
619        const CF: u8 = 8;
620
621        let password = b"password";
622        let hmac = Pbkdf2HmacSha256State::new(password);
623        let salt = b"salt";
624        let mut expected = [0u8; 64];
625
626        let params = scrypt::Params::new(CF, R::U32, 1, 64).unwrap();
627
628        scrypt::scrypt(password, salt, &params, &mut expected).expect("scrypt failed");
629
630        let mut buffers = BufferSet::<_, R>::new_boxed(CF.try_into().unwrap());
631        buffers.set_input(&hmac, salt);
632
633        buffers.scrypt_ro_mix_ex_zmm::<salsa20::x86_64::BlockAvx512F>();
634
635        let mut output = [0u8; 64];
636
637        buffers.extract_output(&hmac, &mut output);
638
639        assert_eq!(output, expected);
640    }
641
642    #[test]
643    fn test_pipeline() {
644        for cf in 1..=8 {
645            let mut buffers0 = BufferSet::<_, U1>::new_boxed(cf.try_into().unwrap());
646            let mut buffers1 = BufferSet::<_, U1>::new_boxed(cf.try_into().unwrap());
647
648            let input_passwords = [
649                b"password0".as_slice(),
650                b"password1".as_slice(),
651                b"password2".as_slice(),
652                b"password3".as_slice(),
653                b"password4".as_slice(),
654                b"password5".as_slice(),
655                b"password6".as_slice(),
656                b"password7".as_slice(),
657                b"password8".as_slice(),
658                b"password9".as_slice(),
659                b"password10".as_slice(),
660            ];
661
662            let input_salts = [
663                b"salt0".as_slice(),
664                b"salt1".as_slice(),
665                b"salt2".as_slice(),
666                b"salt3".as_slice(),
667                b"salt4".as_slice(),
668                b"salt5".as_slice(),
669                b"salt6".as_slice(),
670                b"salt7".as_slice(),
671                b"salt8".as_slice(),
672                b"salt9".as_slice(),
673                b"salt10".as_slice(),
674            ];
675
676            struct Context<'a> {
677                params: scrypt::Params,
678                i: usize,
679                total: usize,
680                password: &'a [u8],
681                salt: &'a [u8],
682            }
683
684            impl<'a, R: ArrayLength + NonZero> PipelineContext<usize, Vec<Align64<Block<R>>>, R, ()>
685                for Context<'a>
686            {
687                fn begin(
688                    &mut self,
689                    _ratchet: &mut usize,
690                    buffer_set: &mut BufferSet<Vec<Align64<Block<R>>>, R>,
691                ) {
692                    buffer_set.set_input(&Pbkdf2HmacSha256State::new(self.password), self.salt);
693                }
694
695                fn drain(
696                    self,
697                    ratchet: &mut usize,
698                    buffer_set: &mut BufferSet<Vec<Align64<Block<R>>>, R>,
699                ) -> Option<()> {
700                    assert_eq!(*ratchet, self.i, "output should be in order");
701                    assert!(*ratchet < self.total, "should have processed all passwords");
702                    *ratchet += 1;
703                    let mut output = [0u8; 64];
704                    buffer_set
705                        .extract_output(&Pbkdf2HmacSha256State::new(self.password), &mut output);
706                    let mut expected = [0u8; 64];
707
708                    scrypt::scrypt(self.password, self.salt, &self.params, &mut expected)
709                        .expect("scrypt failed");
710
711                    assert_eq!(output, expected, "unexpected output at round {}", self.i);
712
713                    if *ratchet == self.total {
714                        Some(())
715                    } else {
716                        None
717                    }
718                }
719            }
720
721            let params = scrypt::Params::new(cf, U1::U32, 1, 64).unwrap();
722
723            // test all possible input counts
724            for test_len in 0..input_passwords.len() {
725                let mut ratchet = 0;
726                let ret = buffers0.pipeline(
727                    &mut buffers1,
728                    input_passwords
729                        .iter()
730                        .zip(input_salts.iter())
731                        .enumerate()
732                        .map(|(i, (p, s))| Context {
733                            params,
734                            i,
735                            total: test_len,
736                            password: p,
737                            salt: s,
738                        })
739                        .take(test_len),
740                    &mut ratchet,
741                );
742
743                assert_eq!(
744                    ret.is_some(),
745                    test_len > 0,
746                    "should have processed all passwords"
747                );
748            }
749        }
750    }
751
752    fn test_ro_mix_cas<R: ArrayLength + NonZero>() {
753        const CF: u8 = 8;
754
755        let password = b"password";
756        let salt = b"salt";
757        let mut expected = [0u8; 64];
758
759        let params = scrypt::Params::new(CF, R::U32, 1, 64).unwrap();
760
761        scrypt::scrypt(password, salt, &params, &mut expected).expect("scrypt failed");
762
763        let mut buffers = BufferSet::<_, R>::new_boxed(CF.try_into().unwrap());
764
765        assert_eq!(buffers.n(), 1 << CF);
766
767        buffers.set_input(&Pbkdf2HmacSha256State::new(password), salt);
768
769        buffers.scrypt_ro_mix();
770
771        let mut output = [0u8; 64];
772
773        buffers.extract_output(&Pbkdf2HmacSha256State::new(password), &mut output);
774
775        assert_eq!(output, expected);
776    }
777
778    fn test_ro_mix_cas_ex<R: ArrayLength + NonZero, S: Salsa20<Lanes = U1>>() {
779        const CF: u8 = 8;
780
781        let password = b"password";
782        let salt = b"salt";
783        let mut expected = [0u8; 64];
784
785        let params = scrypt::Params::new(CF, R::U32, 1, 64).unwrap();
786
787        scrypt::scrypt(password, salt, &params, &mut expected).expect("scrypt failed");
788
789        let mut buffers = BufferSet::<_, R>::new_boxed(CF.try_into().unwrap());
790
791        let mut buffer_dyn = vec![Default::default(); R::USIZE * ((1 << CF) + 2)];
792
793        assert_eq!(buffers.n(), 1 << CF);
794
795        buffers.set_input(&Pbkdf2HmacSha256State::new(password), salt);
796        buffer_dyn
797            .ro_mix_input_buffer(R::U32.try_into().unwrap())
798            .copy_from_slice(buffers.input_buffer().as_slice());
799
800        buffer_dyn.ro_mix_front_ex::<S>(R::U32.try_into().unwrap(), CF.try_into().unwrap());
801        buffers.ro_mix_front_ex::<S>();
802        let dyn_output =
803            buffer_dyn.ro_mix_back_ex::<S>(R::U32.try_into().unwrap(), CF.try_into().unwrap());
804        buffers.ro_mix_back_ex::<S>();
805
806        assert_eq!(dyn_output, buffers.raw_salt_output().as_slice());
807
808        let mut output = [0u8; 64];
809
810        buffers.extract_output(&Pbkdf2HmacSha256State::new(password), &mut output);
811
812        assert_eq!(output, expected);
813    }
814
815    fn test_ro_mix_cas_interleaved<R: ArrayLength + NonZero>() {
816        const CF: u8 = 8;
817
818        let passwords = [
819            b"password0".as_slice(),
820            b"password1".as_slice(),
821            b"password2".as_slice(),
822            b"password3".as_slice(),
823            b"password4".as_slice(),
824            b"password5".as_slice(),
825            b"password6".as_slice(),
826            b"password7".as_slice(),
827            b"password8".as_slice(),
828            b"password9".as_slice(),
829            b"password10".as_slice(),
830            b"password11".as_slice(),
831            b"password12".as_slice(),
832            b"password13".as_slice(),
833            b"password14".as_slice(),
834            b"password15".as_slice(),
835        ];
836
837        let mut expected = [[0u8; 64]; 16];
838
839        for (i, password) in passwords.iter().enumerate() {
840            let params = scrypt::Params::new(CF, R::U32, 1, 64).unwrap();
841            scrypt::scrypt(password, b"salt", &params, &mut expected[i]).expect("scrypt failed");
842        }
843
844        let mut buffers0 = BufferSet::<_, R>::new_boxed(CF.try_into().unwrap());
845        let mut buffers1 = BufferSet::<_, R>::new_boxed(CF.try_into().unwrap());
846
847        let mut output = [0u8; 64];
848        buffers0.set_input(&Pbkdf2HmacSha256State::new(passwords[0]), b"salt");
849        buffers1.set_input(&Pbkdf2HmacSha256State::new(passwords[1]), b"salt");
850        buffers0.ro_mix_front();
851        for i in 2..16 {
852            buffers0.ro_mix_interleaved(&mut buffers1);
853            buffers0.extract_output(&Pbkdf2HmacSha256State::new(passwords[i - 2]), &mut output);
854            assert_eq!(output, expected[i - 2], "error at round {}", i);
855            core::hint::black_box(&mut buffers0);
856            (buffers0, buffers1) = (buffers1, buffers0);
857            buffers1.set_input(&Pbkdf2HmacSha256State::new(passwords[i]), b"salt");
858        }
859        buffers0.ro_mix_back();
860        buffers1.scrypt_ro_mix();
861        buffers0.extract_output(&Pbkdf2HmacSha256State::new(passwords[14]), &mut output);
862        assert_eq!(output, expected[14]);
863        buffers1.extract_output(&Pbkdf2HmacSha256State::new(passwords[15]), &mut output);
864        assert_eq!(output, expected[15]);
865    }
866
867    fn test_ro_mix_cas_interleaved_ex<
868        R: ArrayLength + NonZero,
869        S1: Salsa20<Lanes = U1>,
870        S2: Salsa20<Lanes = U2>,
871    >() {
872        const CF: u8 = 8;
873
874        let passwords = [
875            b"password0".as_slice(),
876            b"password1".as_slice(),
877            b"password2".as_slice(),
878            b"password3".as_slice(),
879            b"password4".as_slice(),
880            b"password5".as_slice(),
881            b"password6".as_slice(),
882            b"password7".as_slice(),
883            b"password8".as_slice(),
884            b"password9".as_slice(),
885            b"password10".as_slice(),
886            b"password11".as_slice(),
887            b"password12".as_slice(),
888            b"password13".as_slice(),
889            b"password14".as_slice(),
890            b"password15".as_slice(),
891        ];
892
893        let mut expected = [[0u8; 64]; 16];
894
895        for (i, password) in passwords.iter().enumerate() {
896            let params = scrypt::Params::new(CF, R::U32, 1, 64).unwrap();
897            scrypt::scrypt(password, b"salt", &params, &mut expected[i]).expect("scrypt failed");
898        }
899
900        let mut buffers0 = BufferSet::<_, R>::new_boxed(CF.try_into().unwrap());
901        let mut buffers1 = BufferSet::<_, R>::new_boxed(CF.try_into().unwrap());
902        let mut buffers0_dyn = vec![Default::default(); R::USIZE * ((1 << CF) + 2)];
903        let mut buffers1_dyn = vec![Default::default(); R::USIZE * ((1 << CF) + 2)];
904
905        let mut output = [0u8; 64];
906        buffers0.set_input(&Pbkdf2HmacSha256State::new(passwords[0]), b"salt");
907        buffers1.set_input(&Pbkdf2HmacSha256State::new(passwords[1]), b"salt");
908        buffers0_dyn
909            .ro_mix_input_buffer(R::U32.try_into().unwrap())
910            .copy_from_slice(buffers0.input_buffer().as_slice());
911        buffers1_dyn
912            .ro_mix_input_buffer(R::U32.try_into().unwrap())
913            .copy_from_slice(buffers1.input_buffer().as_slice());
914
915        buffers0.ro_mix_front_ex::<S1>();
916        buffers0_dyn.ro_mix_front_ex::<S1>(R::U32.try_into().unwrap(), CF.try_into().unwrap());
917        for i in 2..16 {
918            buffers0.ro_mix_interleaved_ex::<S2>(&mut buffers1);
919            let dyn_salt_output = buffers0_dyn.ro_mix_interleaved_ex::<S2>(
920                &mut buffers1_dyn,
921                R::U32.try_into().unwrap(),
922                CF.try_into().unwrap(),
923            );
924            buffers0.extract_output(&Pbkdf2HmacSha256State::new(passwords[i - 2]), &mut output);
925            assert_eq!(dyn_salt_output, buffers0.raw_salt_output().as_slice());
926
927            assert_eq!(output, expected[i - 2], "error at round {}", i);
928            core::hint::black_box(&mut buffers0);
929            (buffers0, buffers1) = (buffers1, buffers0);
930            (buffers0_dyn, buffers1_dyn) = (buffers1_dyn, buffers0_dyn);
931            buffers1.set_input(&Pbkdf2HmacSha256State::new(passwords[i]), b"salt");
932            buffers1_dyn
933                .ro_mix_input_buffer(R::U32.try_into().unwrap())
934                .copy_from_slice(buffers1.input_buffer().as_slice());
935        }
936        buffers0.ro_mix_back_ex::<S1>();
937        let dyn_salt_output =
938            buffers0_dyn.ro_mix_back_ex::<S1>(R::U32.try_into().unwrap(), CF.try_into().unwrap());
939        assert_eq!(dyn_salt_output, buffers0.raw_salt_output().as_slice());
940
941        buffers1.scrypt_ro_mix();
942        buffers1_dyn.ro_mix_front_ex::<S1>(R::U32.try_into().unwrap(), CF.try_into().unwrap());
943        let dyn_salt_output =
944            buffers1_dyn.ro_mix_back_ex::<S1>(R::U32.try_into().unwrap(), CF.try_into().unwrap());
945        assert_eq!(dyn_salt_output, buffers1.raw_salt_output().as_slice());
946
947        buffers0.extract_output(&Pbkdf2HmacSha256State::new(passwords[14]), &mut output);
948        assert_eq!(output, expected[14]);
949        buffers1.extract_output(&Pbkdf2HmacSha256State::new(passwords[15]), &mut output);
950        assert_eq!(output, expected[15]);
951    }
952
953    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
954    fn test_ro_mix_cas_interleaved_zmm<R: ArrayLength + NonZero>() {
955        const CF: u8 = 8;
956
957        let passwords = [
958            b"password0".as_slice(),
959            b"password1".as_slice(),
960            b"password2".as_slice(),
961            b"password3".as_slice(),
962            b"password4".as_slice(),
963            b"password5".as_slice(),
964            b"password6".as_slice(),
965            b"password7".as_slice(),
966            b"password8".as_slice(),
967            b"password9".as_slice(),
968            b"password10".as_slice(),
969            b"password11".as_slice(),
970            b"password12".as_slice(),
971            b"password13".as_slice(),
972            b"password14".as_slice(),
973            b"password15".as_slice(),
974        ];
975
976        let hmacs: [Pbkdf2HmacSha256State; 16] =
977            core::array::from_fn(|i| Pbkdf2HmacSha256State::new(passwords[i]));
978
979        let mut expected = [[0u8; 64]; 16];
980
981        for (i, password) in passwords.iter().enumerate() {
982            let params = scrypt::Params::new(CF, R::U32, 1, 64).unwrap();
983            scrypt::scrypt(password, b"salt", &params, &mut expected[i]).expect("scrypt failed");
984        }
985
986        let mut buffers0 = BufferSet::<_, R>::new_boxed(CF.try_into().unwrap());
987        let mut buffers1 = BufferSet::<_, R>::new_boxed(CF.try_into().unwrap());
988
989        let mut output = [0u8; 64];
990        buffers0.set_input(&hmacs[0], b"salt");
991        buffers1.set_input(&hmacs[1], b"salt");
992        buffers0.ro_mix_front();
993        for i in 2..16 {
994            buffers0.ro_mix_interleaved_ex_zmm::<salsa20::x86_64::BlockAvx512FMb2>(&mut buffers1);
995            buffers0.extract_output(&hmacs[i - 2], &mut output);
996            assert_eq!(output, expected[i - 2], "error at round {}", i);
997            core::hint::black_box(&mut buffers0);
998            (buffers0, buffers1) = (buffers1, buffers0);
999            buffers1.set_input(&hmacs[i], b"salt");
1000        }
1001        buffers0.ro_mix_back();
1002        buffers1.scrypt_ro_mix();
1003        buffers0.extract_output(&hmacs[14], &mut output);
1004        assert_eq!(output, expected[14]);
1005        buffers1.extract_output(&hmacs[15], &mut output);
1006        assert_eq!(output, expected[15]);
1007    }
1008
1009    macro_rules! write_test {
1010        ($name:ident, $test:ident, $($generic:ty),* $(,)?) => {
1011            #[test]
1012            fn $name() {
1013                $test::<$($generic),*>();
1014            }
1015        };
1016    }
1017
1018    // tests for whatever is the default/publicly visible version
1019    write_test!(test_ro_mix_cas_1, test_ro_mix_cas, U1);
1020    write_test!(test_ro_mix_cas_2, test_ro_mix_cas, U2);
1021    write_test!(test_ro_mix_cas_4, test_ro_mix_cas, U4);
1022    write_test!(test_ro_mix_cas_8, test_ro_mix_cas, U8);
1023    write_test!(test_ro_mix_cas_16, test_ro_mix_cas, U16);
1024
1025    write_test!(
1026        test_ro_mix_cas_interleaved_1,
1027        test_ro_mix_cas_interleaved,
1028        U1
1029    );
1030
1031    write_test!(
1032        test_ro_mix_cas_interleaved_2,
1033        test_ro_mix_cas_interleaved,
1034        U2
1035    );
1036
1037    write_test!(
1038        test_ro_mix_cas_interleaved_4,
1039        test_ro_mix_cas_interleaved,
1040        U4
1041    );
1042
1043    write_test!(
1044        test_ro_mix_cas_interleaved_8,
1045        test_ro_mix_cas_interleaved,
1046        U8
1047    );
1048
1049    write_test!(
1050        test_ro_mix_cas_interleaved_16,
1051        test_ro_mix_cas_interleaved,
1052        U16
1053    );
1054
1055    // AVX-2 versions
1056
1057    #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
1058    write_test!(
1059        test_ro_mix_cas_interleaved_1_avx2,
1060        test_ro_mix_cas_interleaved_ex,
1061        U1,
1062        salsa20::x86_64::BlockSse2<U1>,
1063        salsa20::x86_64::BlockAvx2Mb2
1064    );
1065
1066    #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
1067    write_test!(
1068        test_ro_mix_cas_interleaved_2_avx2,
1069        test_ro_mix_cas_interleaved_ex,
1070        U2,
1071        salsa20::x86_64::BlockSse2<U1>,
1072        salsa20::x86_64::BlockAvx2Mb2
1073    );
1074
1075    #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
1076    write_test!(
1077        test_ro_mix_cas_interleaved_4_avx2,
1078        test_ro_mix_cas_interleaved_ex,
1079        U4,
1080        salsa20::x86_64::BlockSse2<U1>,
1081        salsa20::x86_64::BlockAvx2Mb2
1082    );
1083
1084    #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
1085    write_test!(
1086        test_ro_mix_cas_interleaved_8_avx2,
1087        test_ro_mix_cas_interleaved_ex,
1088        U8,
1089        salsa20::x86_64::BlockSse2<U1>,
1090        salsa20::x86_64::BlockAvx2Mb2
1091    );
1092
1093    #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
1094    write_test!(
1095        test_ro_mix_cas_interleaved_16_avx2,
1096        test_ro_mix_cas_interleaved_ex,
1097        U16,
1098        salsa20::x86_64::BlockSse2<U1>,
1099        salsa20::x86_64::BlockAvx2Mb2
1100    );
1101
1102    #[cfg(target_arch = "x86_64")]
1103    write_test!(
1104        test_ro_mix_cas_1_sse2,
1105        test_ro_mix_cas_ex,
1106        U1,
1107        salsa20::x86_64::BlockSse2<U1>,
1108    );
1109    #[cfg(target_arch = "x86_64")]
1110    write_test!(
1111        test_ro_mix_cas_2_sse2,
1112        test_ro_mix_cas_ex,
1113        U2,
1114        salsa20::x86_64::BlockSse2<U1>,
1115    );
1116    #[cfg(target_arch = "x86_64")]
1117    write_test!(
1118        test_ro_mix_cas_4_sse2,
1119        test_ro_mix_cas_ex,
1120        U4,
1121        salsa20::x86_64::BlockSse2<U1>,
1122    );
1123    #[cfg(target_arch = "x86_64")]
1124    write_test!(
1125        test_ro_mix_cas_8_sse2,
1126        test_ro_mix_cas_ex,
1127        U8,
1128        salsa20::x86_64::BlockSse2<U1>,
1129    );
1130    #[cfg(target_arch = "x86_64")]
1131    write_test!(
1132        test_ro_mix_cas_16_sse2,
1133        test_ro_mix_cas_ex,
1134        U16,
1135        salsa20::x86_64::BlockSse2<U1>,
1136    );
1137
1138    // scalar versions
1139
1140    write_test!(
1141        test_ro_mix_cas_scalar_1,
1142        test_ro_mix_cas_ex,
1143        U1,
1144        salsa20::BlockScalar<U1>
1145    );
1146
1147    write_test!(
1148        test_ro_mix_cas_scalar_2,
1149        test_ro_mix_cas_ex,
1150        U2,
1151        salsa20::BlockScalar<U1>
1152    );
1153
1154    write_test!(
1155        test_ro_mix_cas_scalar_4,
1156        test_ro_mix_cas_ex,
1157        U4,
1158        salsa20::BlockScalar<U1>
1159    );
1160
1161    write_test!(
1162        test_ro_mix_cas_scalar_8,
1163        test_ro_mix_cas_ex,
1164        U8,
1165        salsa20::BlockScalar<U1>
1166    );
1167
1168    write_test!(
1169        test_ro_mix_cas_scalar_16,
1170        test_ro_mix_cas_ex,
1171        U16,
1172        salsa20::BlockScalar<U1>
1173    );
1174
1175    write_test!(
1176        test_ro_mix_cas_scalar_interleaved_1,
1177        test_ro_mix_cas_interleaved_ex,
1178        U1,
1179        salsa20::BlockScalar<U1>,
1180        salsa20::BlockScalar<U2>
1181    );
1182
1183    write_test!(
1184        test_ro_mix_cas_scalar_interleaved_2,
1185        test_ro_mix_cas_interleaved_ex,
1186        U2,
1187        salsa20::BlockScalar<U1>,
1188        salsa20::BlockScalar<U2>
1189    );
1190
1191    write_test!(
1192        test_ro_mix_cas_scalar_interleaved_4,
1193        test_ro_mix_cas_interleaved_ex,
1194        U4,
1195        salsa20::BlockScalar<U1>,
1196        salsa20::BlockScalar<U2>
1197    );
1198
1199    write_test!(
1200        test_ro_mix_cas_scalar_interleaved_8,
1201        test_ro_mix_cas_interleaved_ex,
1202        U8,
1203        salsa20::BlockScalar<U1>,
1204        salsa20::BlockScalar<U2>
1205    );
1206
1207    write_test!(
1208        test_ro_mix_cas_scalar_interleaved_16,
1209        test_ro_mix_cas_interleaved_ex,
1210        U16,
1211        salsa20::BlockScalar<U1>,
1212        salsa20::BlockScalar<U2>
1213    );
1214
1215    // AVX-512 versions
1216
1217    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1218    write_test!(
1219        test_ro_mix_cas_avx512f_1,
1220        test_ro_mix_cas_ex,
1221        U1,
1222        salsa20::x86_64::BlockAvx512F
1223    );
1224    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1225    write_test!(
1226        test_ro_mix_cas_avx512f_2,
1227        test_ro_mix_cas_ex,
1228        U2,
1229        salsa20::x86_64::BlockAvx512F
1230    );
1231    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1232    write_test!(
1233        test_ro_mix_cas_avx512f_4,
1234        test_ro_mix_cas_ex,
1235        U4,
1236        salsa20::x86_64::BlockAvx512F
1237    );
1238    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1239    write_test!(
1240        test_ro_mix_cas_avx512f_8,
1241        test_ro_mix_cas_ex,
1242        U8,
1243        salsa20::x86_64::BlockAvx512F
1244    );
1245
1246    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1247    write_test!(
1248        test_ro_mix_cas_avx512f_16,
1249        test_ro_mix_cas_ex,
1250        U16,
1251        salsa20::x86_64::BlockAvx512F
1252    );
1253    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1254    write_test!(
1255        test_ro_mix_cas_interleaved_avx512f_1,
1256        test_ro_mix_cas_interleaved_ex,
1257        U1,
1258        salsa20::x86_64::BlockAvx512F,
1259        salsa20::x86_64::BlockAvx512FMb2
1260    );
1261    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1262    write_test!(
1263        test_ro_mix_cas_interleaved_avx512f_2,
1264        test_ro_mix_cas_interleaved_ex,
1265        U2,
1266        salsa20::x86_64::BlockAvx512F,
1267        salsa20::x86_64::BlockAvx512FMb2
1268    );
1269    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1270    write_test!(
1271        test_ro_mix_cas_interleaved_avx512f_4,
1272        test_ro_mix_cas_interleaved_ex,
1273        U4,
1274        salsa20::x86_64::BlockAvx512F,
1275        salsa20::x86_64::BlockAvx512FMb2
1276    );
1277    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1278    write_test!(
1279        test_ro_mix_cas_interleaved_avx512f_8,
1280        test_ro_mix_cas_interleaved_ex,
1281        U8,
1282        salsa20::x86_64::BlockAvx512F,
1283        salsa20::x86_64::BlockAvx512FMb2
1284    );
1285
1286    // AVX-512 register resident versions
1287
1288    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1289    write_test!(test_ro_mix_cas_zmm_1, test_ro_mix_cas_zmm, U1);
1290    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1291    write_test!(test_ro_mix_cas_zmm_2, test_ro_mix_cas_zmm, U2);
1292    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1293    write_test!(test_ro_mix_cas_zmm_4, test_ro_mix_cas_zmm, U4);
1294    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1295    write_test!(test_ro_mix_cas_zmm_8, test_ro_mix_cas_zmm, U8);
1296
1297    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1298    write_test!(
1299        test_ro_mix_cas_interleaved_zmm_1,
1300        test_ro_mix_cas_interleaved_zmm,
1301        U1
1302    );
1303    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1304    write_test!(
1305        test_ro_mix_cas_interleaved_zmm_2,
1306        test_ro_mix_cas_interleaved_zmm,
1307        U2
1308    );
1309    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1310    write_test!(
1311        test_ro_mix_cas_interleaved_zmm_4,
1312        test_ro_mix_cas_interleaved_zmm,
1313        U4
1314    );
1315    #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
1316    write_test!(
1317        test_ro_mix_cas_interleaved_zmm_8,
1318        test_ro_mix_cas_interleaved_zmm,
1319        U8
1320    );
1321
1322    // portable SIMD versions
1323
1324    #[cfg(feature = "portable-simd")]
1325    write_test!(
1326        test_ro_mix_cas_portable_simd_1,
1327        test_ro_mix_cas_ex,
1328        U1,
1329        salsa20::BlockPortableSimd
1330    );
1331
1332    #[cfg(feature = "portable-simd")]
1333    write_test!(
1334        test_ro_mix_cas_portable_simd_2,
1335        test_ro_mix_cas_ex,
1336        U2,
1337        salsa20::BlockPortableSimd
1338    );
1339
1340    #[cfg(feature = "portable-simd")]
1341    write_test!(
1342        test_ro_mix_cas_portable_simd_4,
1343        test_ro_mix_cas_ex,
1344        U4,
1345        salsa20::BlockPortableSimd
1346    );
1347
1348    #[cfg(feature = "portable-simd")]
1349    write_test!(
1350        test_ro_mix_cas_portable_simd_8,
1351        test_ro_mix_cas_ex,
1352        U8,
1353        salsa20::BlockPortableSimd
1354    );
1355
1356    #[cfg(feature = "portable-simd")]
1357    write_test!(
1358        test_ro_mix_cas_portable_simd_16,
1359        test_ro_mix_cas_ex,
1360        U16,
1361        salsa20::BlockPortableSimd
1362    );
1363
1364    // portable SIMD interleaved versions
1365
1366    #[cfg(feature = "portable-simd")]
1367    write_test!(
1368        test_ro_mix_cas_portable_simd_interleaved_1,
1369        test_ro_mix_cas_interleaved_ex,
1370        U1,
1371        salsa20::BlockPortableSimd,
1372        salsa20::BlockPortableSimd2
1373    );
1374
1375    #[cfg(feature = "portable-simd")]
1376    write_test!(
1377        test_ro_mix_cas_portable_simd_interleaved_2,
1378        test_ro_mix_cas_interleaved_ex,
1379        U2,
1380        salsa20::BlockPortableSimd,
1381        salsa20::BlockPortableSimd2
1382    );
1383
1384    #[cfg(feature = "portable-simd")]
1385    write_test!(
1386        test_ro_mix_cas_portable_simd_interleaved_4,
1387        test_ro_mix_cas_interleaved_ex,
1388        U4,
1389        salsa20::BlockPortableSimd,
1390        salsa20::BlockPortableSimd2
1391    );
1392
1393    #[cfg(feature = "portable-simd")]
1394    write_test!(
1395        test_ro_mix_cas_portable_simd_interleaved_8,
1396        test_ro_mix_cas_interleaved_ex,
1397        U8,
1398        salsa20::BlockPortableSimd,
1399        salsa20::BlockPortableSimd2
1400    );
1401
1402    #[cfg(feature = "portable-simd")]
1403    write_test!(
1404        test_ro_mix_cas_portable_simd_interleaved_16,
1405        test_ro_mix_cas_interleaved_ex,
1406        U16,
1407        salsa20::BlockPortableSimd,
1408        salsa20::BlockPortableSimd2
1409    );
1410}