scrypt_opt/
pipeline.rs

1use crate::{
2    Align64, RoMix,
3    fixed_r::{Block, BufferSet},
4    pbkdf2_1::{CreatePbkdf2HmacSha256State, Pbkdf2HmacSha256State},
5};
6use core::num::{NonZeroU8, NonZeroU32, NonZeroU64};
7use generic_array::{
8    ArrayLength,
9    typenum::{NonZero, U1},
10};
11
12/// Comparison operators for the pipeline
13pub const CMP_EQ: u32 = 0x00000001;
14/// Less than
15pub const CMP_LT: u32 = 0x00000002;
16/// Greater than
17pub const CMP_GT: u32 = 0x00000004;
18/// Less than or equal to
19pub const CMP_LE: u32 = CMP_EQ | CMP_LT;
20/// Greater than or equal to
21pub const CMP_GE: u32 = CMP_EQ | CMP_GT;
22
23#[cold]
24fn unlikely() {}
25
26/// A context for a pipeline computation.
27///
28/// It is already implemented for `(&'a Align64<Block<R>>, &'b mut BlockU8<R>)` and `(&'a Align64<Block<R>>, &'b mut Align64<BlockU8<R>>)`
29pub trait PipelineContext<
30    S,
31    Q: AsRef<[Align64<Block<R>>]> + AsMut<[Align64<Block<R>>]>,
32    R: ArrayLength + NonZero,
33    K,
34>
35{
36    /// Called to initialize each computation.
37    fn begin(&mut self, state: &mut S, buffer_set: &mut BufferSet<Q, R>);
38
39    /// Called to process the result of each computation.
40    ///
41    /// Returns `Some(K)` if the computation should be terminated.
42    fn drain(self, state: &mut S, buffer_set: &mut BufferSet<Q, R>) -> Option<K>;
43}
44
45impl<
46    'a,
47    'b,
48    S,
49    Q: AsRef<[Align64<Block<R>>]> + AsMut<[Align64<Block<R>>]>,
50    R: ArrayLength + NonZero,
51> PipelineContext<S, Q, R, ()> for (&'a Align64<Block<R>>, &'b mut Align64<Block<R>>)
52{
53    #[inline(always)]
54    fn begin(&mut self, _state: &mut S, buffer_set: &mut BufferSet<Q, R>) {
55        buffer_set.input_buffer_mut().copy_from_slice(self.0);
56    }
57
58    #[inline(always)]
59    fn drain(self, _state: &mut S, buffer_set: &mut BufferSet<Q, R>) -> Option<()> {
60        self.1.copy_from_slice(buffer_set.raw_salt_output());
61        None
62    }
63}
64
65/// Brute force a masked test for a given target and nonce generator at a given offset with a compile-time R and a fixed P of 1.
66pub fn test_static<
67    const OP: u32,
68    Q: AsRef<[Align64<crate::fixed_r::Block<R>>]> + AsMut<[Align64<crate::fixed_r::Block<R>>]>,
69    R: ArrayLength + NonZero,
70    N: CreatePbkdf2HmacSha256State,
71>(
72    buffer_sets: [&mut crate::fixed_r::BufferSet<Q, R>; 2],
73    salt: &[u8],
74    mask: NonZeroU64,
75    target: u64,
76    offset: usize,
77    nonce_generator: impl IntoIterator<Item = N>,
78) -> Option<(N, Pbkdf2HmacSha256State)> {
79    match OP {
80        CMP_EQ | CMP_LT | CMP_GT | CMP_LE | CMP_GE => {}
81        _ => panic!("invalid OP: {}", OP),
82    }
83
84    struct State<'a, R: ArrayLength + NonZero> {
85        mask: NonZeroU64,
86        target: u64,
87        offset: usize,
88        salt: &'a [u8],
89        _marker: core::marker::PhantomData<R>,
90    }
91
92    let mut state = State {
93        mask,
94        target,
95        offset,
96        salt,
97        _marker: core::marker::PhantomData,
98    };
99
100    struct NonceState<R: ArrayLength + NonZero, const OP: u32, N> {
101        nonce: N,
102        hmac_state: Pbkdf2HmacSha256State,
103        _marker: core::marker::PhantomData<R>,
104    }
105
106    impl<
107        'a,
108        const OP: u32,
109        Q: AsRef<[Align64<crate::fixed_r::Block<R>>]> + AsMut<[Align64<crate::fixed_r::Block<R>>]>,
110        R: ArrayLength + NonZero,
111        N: CreatePbkdf2HmacSha256State,
112    > PipelineContext<State<'a, R>, Q, R, (N, Pbkdf2HmacSha256State)> for NonceState<R, OP, N>
113    {
114        #[inline(always)]
115        fn begin(
116            &mut self,
117            pipeline_state: &mut State<'a, R>,
118            buffer_set: &mut crate::fixed_r::BufferSet<Q, R>,
119        ) {
120            buffer_set.set_input(&self.hmac_state, &pipeline_state.salt);
121        }
122
123        #[inline(always)]
124        fn drain(
125            self,
126            pipeline_state: &mut State<'a, R>,
127            buffer_set: &mut crate::fixed_r::BufferSet<Q, R>,
128        ) -> Option<(N, Pbkdf2HmacSha256State)> {
129            let mut output = [0u8; 8];
130            self.hmac_state.partial_gather(
131                [buffer_set.raw_salt_output()],
132                pipeline_state.offset,
133                &mut output,
134            );
135
136            let t = u64::from_be_bytes(output) & pipeline_state.mask.get();
137
138            let succeeded = match OP {
139                CMP_EQ => t == pipeline_state.target,
140                CMP_LT => t < pipeline_state.target,
141                CMP_GT => t > pipeline_state.target,
142                CMP_LE => t <= pipeline_state.target,
143                CMP_GE => t >= pipeline_state.target,
144                _ => unreachable!(),
145            };
146            if succeeded {
147                unlikely();
148                let mut output_hmac_state = self.hmac_state.clone();
149                output_hmac_state.ingest_salt(unsafe {
150                    core::slice::from_raw_parts(
151                        buffer_set
152                            .raw_salt_output()
153                            .as_ptr()
154                            .cast::<Align64<crate::fixed_r::Block<R>>>(),
155                        1,
156                    )
157                });
158                return Some((self.nonce, output_hmac_state));
159            }
160
161            None
162        }
163    }
164
165    let [buffer_set0, buffer_set1] = buffer_sets;
166
167    buffer_set0.pipeline(
168        buffer_set1,
169        nonce_generator.into_iter().map(|i| NonceState::<R, OP, N> {
170            hmac_state: i.create_pbkdf2_hmac_sha256_state(),
171            nonce: i,
172            _marker: core::marker::PhantomData,
173        }),
174        &mut state,
175    )
176}
177
178/// Brute force a masked test for a given target and nonce generator at a given offset with a runtime R and P.
179pub fn test<const OP: u32, N: CreatePbkdf2HmacSha256State>(
180    buffer_sets: &mut [Align64<crate::fixed_r::Block<U1>>],
181    cf: NonZeroU8,
182    r: NonZeroU32,
183    p: NonZeroU32,
184    salt: &[u8],
185    mask: NonZeroU64,
186    target: u64,
187    offset: usize,
188    nonce_generator: impl IntoIterator<Item = N>,
189) -> Option<(N, Pbkdf2HmacSha256State)> {
190    match OP {
191        CMP_EQ | CMP_LT | CMP_GT | CMP_LE | CMP_GE => {}
192        _ => panic!("invalid OP: {}", OP),
193    }
194
195    let expected_len = (r.get() * ((1 << cf.get()) + 2)).try_into().unwrap();
196    let [mut buffer_set0, mut buffer_set1] = buffer_sets
197        .get_disjoint_mut([0..expected_len, expected_len..(expected_len * 2)])
198        .expect("buffer_sets is not large enough, at least 2 * r * ((1 << cf) + 2) elements are required");
199
200    let mut nonce_generator = nonce_generator.into_iter();
201
202    let mut current_nonce = nonce_generator.next()?;
203    let mut current_hmac_state = current_nonce.create_pbkdf2_hmac_sha256_state();
204    let mut output_hmac_state = current_hmac_state.clone();
205
206    // prologue of the global pipeline - hydrate the leading buffer set
207    current_hmac_state.emit_scatter(
208        salt,
209        buffer_set0
210            .ro_mix_input_buffer(r)
211            .chunks_exact_mut(core::mem::size_of::<Align64<crate::fixed_r::Block<U1>>>())
212            .map(|chunk| unsafe {
213                chunk
214                    .as_mut_ptr()
215                    .cast::<Align64<crate::fixed_r::Block<U1>>>()
216                    .as_mut()
217                    .unwrap()
218            }),
219    );
220    buffer_set0.ro_mix_front(r, cf);
221
222    loop {
223        // complete the current chunk except the last RoMixBack
224        for chunk_idx in 1..p.get() {
225            current_hmac_state.emit_scatter_offset(
226                salt,
227                buffer_set1
228                    .ro_mix_input_buffer(r)
229                    .chunks_exact_mut(core::mem::size_of::<Align64<crate::fixed_r::Block<U1>>>())
230                    .map(|chunk| unsafe {
231                        chunk
232                            .as_mut_ptr()
233                            .cast::<Align64<crate::fixed_r::Block<U1>>>()
234                            .as_mut()
235                            .unwrap()
236                    }),
237                chunk_idx * 4 * r.get(),
238            );
239
240            let salt = buffer_set0.ro_mix_interleaved(&mut buffer_set1, r, cf);
241
242            output_hmac_state.ingest_salt(unsafe {
243                core::slice::from_raw_parts(
244                    salt.as_ptr().cast::<Align64<crate::fixed_r::Block<U1>>>(),
245                    salt.len() / core::mem::size_of::<Align64<crate::fixed_r::Block<U1>>>(),
246                )
247            });
248
249            (buffer_set0, buffer_set1) = (buffer_set1, buffer_set0);
250        }
251
252        // figure out the next nonce and hmac state
253        let (salt, new_state) = if let Some(next_nonce) = nonce_generator.next() {
254            let new_hmac_state = next_nonce.create_pbkdf2_hmac_sha256_state();
255            new_hmac_state.emit_scatter(
256                salt,
257                buffer_set1
258                    .ro_mix_input_buffer(r)
259                    .chunks_exact_mut(core::mem::size_of::<Align64<crate::fixed_r::Block<U1>>>())
260                    .map(|chunk| unsafe {
261                        chunk
262                            .as_mut_ptr()
263                            .cast::<Align64<crate::fixed_r::Block<U1>>>()
264                            .as_mut()
265                            .unwrap()
266                    }),
267            );
268
269            (
270                buffer_set0.ro_mix_interleaved(&mut buffer_set1, r, cf),
271                Some((next_nonce, new_hmac_state)),
272            )
273        } else {
274            (buffer_set0.ro_mix_back(r, cf), None)
275        };
276
277        let mut tmp_output = [0u8; 8];
278
279        output_hmac_state.partial_gather(
280            salt.chunks_exact(core::mem::size_of::<Align64<crate::fixed_r::Block<U1>>>())
281                .map(|block| unsafe {
282                    block
283                        .as_ptr()
284                        .cast::<Align64<crate::fixed_r::Block<U1>>>()
285                        .as_ref()
286                        .unwrap()
287                }),
288            offset,
289            &mut tmp_output,
290        );
291        let t = u64::from_be_bytes(tmp_output) & mask.get();
292
293        if match OP {
294            CMP_EQ => t == target,
295            CMP_LT => t < target,
296            CMP_GT => t > target,
297            CMP_LE => t <= target,
298            CMP_GE => t >= target,
299            _ => unreachable!(),
300        } {
301            unlikely();
302            unsafe {
303                output_hmac_state.ingest_salt(core::slice::from_raw_parts(
304                    salt.as_ptr().cast::<Align64<crate::fixed_r::Block<U1>>>(),
305                    salt.len() / core::mem::size_of::<Align64<crate::fixed_r::Block<U1>>>(),
306                ));
307            }
308            return Some((current_nonce, output_hmac_state));
309        }
310
311        let Some((next_nonce, new_hmac_state)) = new_state else {
312            return None;
313        };
314
315        {
316            current_nonce = next_nonce;
317            current_hmac_state = new_hmac_state;
318            output_hmac_state = current_hmac_state.clone();
319
320            (buffer_set0, buffer_set1) = (buffer_set1, buffer_set0);
321        }
322    }
323}
324
325#[cfg(test)]
326mod tests {
327    use generic_array::typenum::{U1, U2, U3, U4, U8, U16};
328
329    use super::*;
330
331    #[test]
332    fn test_pow_kat() {
333        let target = "0002";
334
335        let cf = NonZeroU8::new(3).unwrap();
336        let r = NonZeroU32::new(8).unwrap();
337        let p = NonZeroU32::new(1).unwrap();
338
339        let mut target_u64 = 0u64;
340        let mut target_mask = 0u64;
341
342        for nibble in target.as_bytes().iter() {
343            let addend = match nibble {
344                b'0'..=b'9' => nibble - b'0',
345                b'A'..=b'F' => nibble - b'A' + 10,
346                b'a'..=b'f' => nibble - b'a' + 10,
347                _ => panic!("invalid nibble: {}", nibble),
348            } as u64;
349
350            target_u64 <<= 4;
351            target_u64 |= addend;
352            target_mask <<= 4;
353            target_mask |= 15;
354        }
355
356        target_u64 <<= (16 - target.len()) * 4;
357        target_mask <<= (16 - target.len()) * 4;
358
359        let expected_nonce = u64::from_le_bytes([0x11, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
360
361        let mut buffer_sets = vec![
362            Align64::<crate::fixed_r::Block<U1>>::default();
363            2 * r.get() as usize * p.get() as usize * ((1 << cf.get()) + 2)
364        ];
365        for offset in -5..=5 {
366            for len in 0..=8 {
367                let includes_nonce = ((expected_nonce as i64 + offset)
368                    ..(expected_nonce as i64 + offset + len as i64))
369                    .contains(&(expected_nonce as i64));
370
371                let static_result = test_static::<CMP_LE, _, U8, _>(
372                    [
373                        &mut *crate::fixed_r::BufferSet::new_boxed(cf),
374                        &mut *crate::fixed_r::BufferSet::new_boxed(cf),
375                    ],
376                    &[0x29, 0x39, 0x66, 0x3c, 0x6f, 0x46, 0x15, 0xc3],
377                    NonZeroU64::new(target_mask).unwrap(),
378                    target_u64,
379                    28 / 2,
380                    ((expected_nonce as i64 + offset)..)
381                        .map(|i| i as u64)
382                        .take(len),
383                );
384
385                let dynamic_result = test::<CMP_LE, _>(
386                    &mut buffer_sets,
387                    cf,
388                    r,
389                    p,
390                    &[0x29, 0x39, 0x66, 0x3c, 0x6f, 0x46, 0x15, 0xc3],
391                    NonZeroU64::new(target_mask).unwrap(),
392                    target_u64,
393                    28 / 2,
394                    ((expected_nonce as i64 + offset)..)
395                        .map(|i| i as u64)
396                        .take(len),
397                );
398
399                if !includes_nonce {
400                    assert!(static_result.is_none(), "static_result is not none");
401                    assert!(dynamic_result.is_none(), "dynamic_result is not none");
402                    continue;
403                }
404
405                let (nonce, hmac_state_static) = static_result.unwrap();
406
407                assert_eq!(nonce, expected_nonce);
408
409                let (nonce, hmac_state_dynamic) = dynamic_result.unwrap();
410
411                assert_eq!(nonce, expected_nonce);
412
413                assert_eq!(hmac_state_static, hmac_state_dynamic);
414            }
415        }
416    }
417
418    #[test]
419    fn test_pow_high_p() {
420        let target = "002";
421        const SALT: &[u8] = &[0x29, 0x39, 0x66, 0x3c, 0x6f, 0x46, 0x15, 0xc3];
422        for p in 1..=6 {
423            let cf = NonZeroU8::new(3).unwrap();
424            let r = NonZeroU32::new(8).unwrap();
425            let p = NonZeroU32::new(p).unwrap();
426            let params = scrypt::Params::new(cf.get(), r.get(), p.get(), 16).unwrap();
427
428            let mut buffer_sets =
429                vec![
430                    Align64::<crate::fixed_r::Block<U1>>::default();
431                    2 * r.get() as usize * p.get() as usize * ((1 << cf.get()) + 2)
432                ];
433
434            let mut target_u64 = 0u64;
435            let mut target_mask = 0u64;
436
437            for nibble in target.as_bytes().iter() {
438                let addend = match nibble {
439                    b'0'..=b'9' => nibble - b'0',
440                    b'A'..=b'F' => nibble - b'A' + 10,
441                    b'a'..=b'f' => nibble - b'a' + 10,
442                    _ => panic!("invalid nibble: {}", nibble),
443                } as u64;
444
445                target_u64 <<= 4;
446                target_u64 |= addend;
447                target_mask <<= 4;
448                target_mask |= 15;
449            }
450
451            let expected_iterations = target_mask.div_ceil(target_u64 + 1);
452
453            target_u64 <<= (16 - target.len()) * 4;
454            target_mask <<= (16 - target.len()) * 4;
455
456            let (nonce, hmac_state) = test::<CMP_LE, _>(
457                &mut buffer_sets,
458                cf,
459                r,
460                p,
461                SALT,
462                NonZeroU64::new(target_mask).unwrap(),
463                target_u64,
464                0,
465                0..expected_iterations * 100,
466            )
467            .unwrap();
468
469            let mut expected_output = [0u8; 16];
470
471            scrypt::scrypt(&nonce.to_le_bytes(), SALT, &params, &mut expected_output).unwrap();
472
473            let mut output = [0u8; 16];
474            hmac_state.emit(&mut output);
475
476            assert_eq!(output, expected_output);
477            assert!(
478                u64::from_be_bytes(output[0..8].try_into().unwrap()) & target_mask
479                    <= u64::from_be_bytes([0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])
480            );
481        }
482    }
483
484    fn test_pow_consistency<R: ArrayLength + NonZero>() {
485        for target in ["03", "005", "0030"] {
486            let cf = NonZeroU8::new(3).unwrap();
487
488            let mut target_u64 = 0u64;
489            let mut target_mask = 0u64;
490
491            for nibble in target.as_bytes().iter() {
492                let addend = match nibble {
493                    b'0'..=b'9' => nibble - b'0',
494                    b'A'..=b'F' => nibble - b'A' + 10,
495                    b'a'..=b'f' => nibble - b'a' + 10,
496                    _ => panic!("invalid nibble: {}", nibble),
497                } as u64;
498
499                target_u64 <<= 4;
500                target_u64 |= addend;
501                target_mask <<= 4;
502                target_mask |= 15;
503            }
504
505            let expected_iterations = target_mask.div_ceil(target_u64 + 1);
506
507            target_u64 <<= (16 - target.len()) * 4;
508            target_mask <<= (16 - target.len()) * 4;
509
510            let mut buffer_sets = vec![
511                Align64::<crate::fixed_r::Block<U1>>::default();
512                2 * R::USIZE * 1 as usize * ((1 << cf.get()) + 2)
513            ];
514            let static_result = test_static::<CMP_LE, _, R, _>(
515                [
516                    &mut *crate::fixed_r::BufferSet::new_boxed(cf),
517                    &mut *crate::fixed_r::BufferSet::new_boxed(cf),
518                ],
519                &[0x29, 0x39, 0x66, 0x3c, 0x6f, 0x46, 0x15, 0xc3],
520                NonZeroU64::new(target_mask).unwrap(),
521                target_u64,
522                28 / 2,
523                0..expected_iterations * 100,
524            );
525
526            let dynamic_result = test::<CMP_LE, _>(
527                &mut buffer_sets,
528                cf,
529                R::U32.try_into().unwrap(),
530                1.try_into().unwrap(),
531                &[0x29, 0x39, 0x66, 0x3c, 0x6f, 0x46, 0x15, 0xc3],
532                NonZeroU64::new(target_mask).unwrap(),
533                target_u64,
534                28 / 2,
535                0..expected_iterations * 100,
536            );
537
538            let (nonce_static, hmac_state_static) = static_result.unwrap();
539
540            let (nonce_dynamic, hmac_state_dynamic) = dynamic_result.unwrap();
541
542            assert_eq!(nonce_static, nonce_dynamic);
543            assert_eq!(hmac_state_static, hmac_state_dynamic);
544        }
545    }
546
547    #[test]
548    fn test_pow_consistency_r1() {
549        test_pow_consistency::<U1>();
550    }
551
552    #[test]
553    fn test_pow_consistency_r2() {
554        test_pow_consistency::<U2>();
555    }
556
557    #[test]
558    fn test_pow_consistency_r3() {
559        test_pow_consistency::<U3>();
560    }
561
562    #[test]
563    fn test_pow_consistency_r4() {
564        test_pow_consistency::<U4>();
565    }
566
567    #[test]
568    fn test_pow_consistency_r8() {
569        test_pow_consistency::<U8>();
570    }
571
572    #[test]
573    fn test_pow_consistency_r16() {
574        test_pow_consistency::<U16>();
575    }
576}