Skip to main content

pow_buster/solver/
sha_ni.rs

1#[cfg(target_arch = "x86_64")]
2use core::arch::x86_64::*;
3
4#[cfg(target_arch = "x86")]
5use core::arch::x86::*;
6
7use crate::{
8    Align16, SWAP_DWORD_BYTE_ORDER, decompose_blocks_mut,
9    message::{DecimalMessage, DoubleBlockMessage, GoAwayMessage, SingleBlockMessage},
10};
11
12cpufeatures::new!(sse41sha, "sha", "sse4.1");
13
14#[derive(Debug, Copy, Clone)]
15/// Required features for SHA-NI solver.
16pub struct RequiredFeatures;
17
18impl Default for RequiredFeatures {
19    fn default() -> Self {
20        Self
21    }
22}
23
24impl crate::solver::CpuIDToken for RequiredFeatures {
25    fn get() -> bool {
26        sse41sha::get()
27    }
28}
29
30/// SHA-NI decimal nonce single block solver.
31///
32///
33/// Current implementation: 4 way multi-issue with 4-round hotstart granularity.
34pub struct SingleBlockSolver {
35    message: SingleBlockMessage,
36
37    attempted_nonces: u64,
38
39    limit: u64,
40}
41
42impl From<super::safe::SingleBlockSolver> for SingleBlockSolver {
43    fn from(solver: super::safe::SingleBlockSolver) -> Self {
44        Self {
45            message: solver.message,
46            attempted_nonces: solver.attempted_nonces,
47            limit: solver.limit,
48        }
49    }
50}
51
52impl From<SingleBlockMessage> for SingleBlockSolver {
53    fn from(message: SingleBlockMessage) -> Self {
54        Self {
55            message,
56            attempted_nonces: 0,
57            limit: u64::MAX,
58        }
59    }
60}
61
62impl crate::solver::Solver for SingleBlockSolver {
63    fn set_limit(&mut self, limit: u64) {
64        self.limit = limit;
65    }
66
67    fn get_attempted_nonces(&self) -> u64 {
68        self.attempted_nonces
69    }
70    fn solve<const TYPE: u8>(&mut self, target: u64, mask: u64) -> Option<(u64, [u32; 8])> {
71        if self.message.no_trailing_zeros {
72            self.solve_impl::<TYPE, true>(target, mask)
73        } else {
74            self.solve_impl::<TYPE, false>(target, mask)
75        }
76    }
77}
78
79impl SingleBlockSolver {
80    fn solve_impl<const TYPE: u8, const NO_TRAILING_ZEROS: bool>(
81        &mut self,
82        target: u64,
83        mask: u64,
84    ) -> Option<(u64, [u32; 8])> {
85        let lane_id_0_word_idx = self.message.digit_index / 4;
86
87        let target = target & mask;
88        {
89            let message = decompose_blocks_mut(&mut self.message.message);
90            for i in (self.message.digit_index..).take(9) {
91                message[SWAP_DWORD_BYTE_ORDER[i]] = b'0';
92            }
93            if NO_TRAILING_ZEROS {
94                message[SWAP_DWORD_BYTE_ORDER[self.message.digit_index + 8]] = b'1';
95            }
96        }
97
98        let lane_id_1_word_idx = (self.message.digit_index + 1) / 4;
99
100        macro_rules! dispatch {
101            ($idx0_0:literal, $idx0_1:literal, $idx0_2:literal, $lane_id_1_word_idx_inc:literal) => {
102                unsafe {
103                    if self.message.digit_index % 4 == 2 {
104                        self.solve_inner::<
105                                                            { $idx0_0 },
106                                                            { $idx0_1 },
107                                                            { $idx0_2 },
108                                                            { $lane_id_1_word_idx_inc },
109                                                            TYPE,
110                                                            NO_TRAILING_ZEROS,
111                                                            true,
112                                                        >(target, mask)
113                    } else {
114                        self.solve_inner::<
115                                                            { $idx0_0 },
116                                                            { $idx0_1 },
117                                                            { $idx0_2 },
118                                                            { $lane_id_1_word_idx_inc },
119                                                            TYPE,
120                                                            NO_TRAILING_ZEROS,
121                                                            false,
122                                                        >(target, mask)
123                    }
124                }
125            };
126            ($idx0_0:literal, $idx0_1:literal, $idx0_2:literal) => {
127                if lane_id_1_word_idx == lane_id_0_word_idx {
128                    dispatch!($idx0_0, $idx0_1, $idx0_2, false)
129                } else {
130                    dispatch!($idx0_0, $idx0_1, $idx0_2, true)
131                }
132            };
133        }
134
135        let nonce = match lane_id_0_word_idx {
136            0 => dispatch!(0, 0, 0),
137            1 => dispatch!(0, 0, 1),
138            2 => dispatch!(0, 0, 2),
139            3 => dispatch!(0, 0, 3),
140            4 => dispatch!(4, 1, 0),
141            5 => dispatch!(4, 1, 1),
142            6 => dispatch!(4, 1, 2),
143            7 => dispatch!(4, 1, 3),
144            8 => dispatch!(8, 2, 0),
145            9 => dispatch!(8, 2, 1),
146            10 => dispatch!(8, 2, 2),
147            11 => dispatch!(8, 2, 3),
148            12 => dispatch!(12, 3, 0),
149            13 => dispatch!(12, 3, 1),
150            _ => unsafe { core::hint::unreachable_unchecked() },
151        }?;
152
153        let mut final_sha_state = self.message.prefix_state;
154        crate::sha256::digest_block(&mut final_sha_state, &self.message.message);
155
156        Some((nonce + self.message.nonce_addend, final_sha_state))
157    }
158
159    #[inline(never)]
160    #[target_feature(enable = "sse4.1,sha")]
161    fn solve_inner<
162        const DIGIT_WORD_IDX0_DIV_4_TIMES_4: usize,
163        const DIGIT_WORD_IDX0_DIV_4: usize,
164        const DIGIT_WORD_IDX0_MOD_4: usize,
165        const DIGIT_WORD_IDX1_INC: bool,
166        const TYPE: u8,
167        const NO_TRAILING_ZEROS: bool,
168        const ON_REGISTER_BOUNDARY: bool,
169    >(
170        &mut self,
171        target: u64,
172        mask: u64,
173    ) -> Option<u64> {
174        let mut partial_state = Align16(self.message.prefix_state);
175        crate::sha256::ingest_message_prefix::<{ DIGIT_WORD_IDX0_DIV_4_TIMES_4 }>(
176            &mut partial_state,
177            core::array::from_fn(|i| self.message.message[i]),
178        );
179        let prepared_state = crate::sha256::sha_ni::prepare_state(&partial_state);
180        let lane_id_0_byte_idx = self.message.digit_index % 4;
181        let lane_id_1_byte_idx = (self.message.digit_index + 1) % 4;
182
183        // move AB into position for feedback
184        unsafe {
185            let feedback_ab = {
186                let lows = _mm_cvtsi64x_si128(
187                    ((self.message.prefix_state[0] as u64) << 32
188                        | self.message.prefix_state[1] as u64) as _,
189                );
190
191                _mm_shuffle_epi32(lows, 0b01001010)
192            };
193
194            for nonce_prefix_start in (10u32..=96).step_by(4) {
195                const fn to_ascii_u32(input: u32) -> u32 {
196                    let high_digit = input / 10;
197                    let low_digit = input % 10;
198                    u32::from_be_bytes([0, 0, high_digit as u8 + b'0', low_digit as u8 + b'0'])
199                }
200                let lane_index_values = [
201                    to_ascii_u32(nonce_prefix_start),
202                    to_ascii_u32(nonce_prefix_start + 1),
203                    to_ascii_u32(nonce_prefix_start + 2),
204                    to_ascii_u32(nonce_prefix_start + 3),
205                ];
206
207                let lane_id_1_or_value = core::array::from_fn(|i| {
208                    (lane_index_values[i] & 0xff) << ((3 - lane_id_1_byte_idx) * 8)
209                });
210
211                let lane_id_0_or_value = core::array::from_fn(|i| {
212                    let mut r = (lane_index_values[i] >> 8) << ((3 - lane_id_0_byte_idx) * 8);
213                    if !DIGIT_WORD_IDX1_INC {
214                        r |= lane_id_1_or_value[i]
215                    }
216                    r
217                });
218
219                struct LaneIdPlucker<
220                    'a,
221                    const DIGIT_WORD_IDX0_DIV_4: usize,
222                    const DIGIT_WORD_IDX0_MOD_4: usize,
223                    const DIGIT_WORD_IDX1_INC: bool,
224                > {
225                    lane_0_or_value: &'a [u32; 4],
226                    lane_1_or_value: &'a [u32; 4],
227                }
228
229                impl<
230                    'a,
231                    const DIGIT_WORD_IDX0_DIV_4: usize,
232                    const DIGIT_WORD_IDX0_MOD_4: usize,
233                    const DIGIT_WORD_IDX1_INC: bool,
234                >
235                    LaneIdPlucker<
236                        'a,
237                        DIGIT_WORD_IDX0_DIV_4,
238                        DIGIT_WORD_IDX0_MOD_4,
239                        DIGIT_WORD_IDX1_INC,
240                    >
241                {
242                    #[inline(always)]
243                    fn fetch_msg_or(&self, idx: usize, lane: usize) -> u32 {
244                        if idx == DIGIT_WORD_IDX0_DIV_4 * 4 + DIGIT_WORD_IDX0_MOD_4 {
245                            self.lane_0_or_value[lane]
246                        } else if DIGIT_WORD_IDX1_INC
247                            && idx == DIGIT_WORD_IDX0_DIV_4 * 4 + DIGIT_WORD_IDX0_MOD_4 + 1
248                        {
249                            self.lane_1_or_value[lane]
250                        } else {
251                            0
252                        }
253                    }
254                }
255
256                impl<
257                    'a,
258                    const DIGIT_WORD_IDX0_DIV_4: usize,
259                    const DIGIT_WORD_IDX0_MOD_4: usize,
260                    const DIGIT_WORD_IDX1_INC: bool,
261                > crate::sha256::sha_ni::Plucker
262                    for LaneIdPlucker<
263                        'a,
264                        DIGIT_WORD_IDX0_DIV_4,
265                        DIGIT_WORD_IDX0_MOD_4,
266                        DIGIT_WORD_IDX1_INC,
267                    >
268                {
269                    #[inline(always)]
270                    fn pluck_qword0(&mut self, lane: usize, w: &mut __m128i) {
271                        unsafe {
272                            *w = _mm_or_si128(
273                                *w,
274                                _mm_setr_epi32(
275                                    self.fetch_msg_or(0, lane) as _,
276                                    self.fetch_msg_or(1, lane) as _,
277                                    self.fetch_msg_or(2, lane) as _,
278                                    self.fetch_msg_or(3, lane) as _,
279                                ),
280                            );
281                        }
282                    }
283                    #[inline(always)]
284                    fn pluck_qword1(&mut self, lane: usize, w: &mut __m128i) {
285                        unsafe {
286                            *w = _mm_or_si128(
287                                *w,
288                                _mm_setr_epi32(
289                                    self.fetch_msg_or(4, lane) as _,
290                                    self.fetch_msg_or(5, lane) as _,
291                                    self.fetch_msg_or(6, lane) as _,
292                                    self.fetch_msg_or(7, lane) as _,
293                                ),
294                            );
295                        }
296                    }
297                    #[inline(always)]
298                    fn pluck_qword2(&mut self, lane: usize, w: &mut __m128i) {
299                        unsafe {
300                            *w = _mm_or_si128(
301                                *w,
302                                _mm_setr_epi32(
303                                    self.fetch_msg_or(8, lane) as _,
304                                    self.fetch_msg_or(9, lane) as _,
305                                    self.fetch_msg_or(10, lane) as _,
306                                    self.fetch_msg_or(11, lane) as _,
307                                ),
308                            );
309                        }
310                    }
311                    #[inline(always)]
312                    fn pluck_qword3(&mut self, lane: usize, w: &mut __m128i) {
313                        unsafe {
314                            *w = _mm_or_si128(
315                                *w,
316                                _mm_setr_epi32(
317                                    self.fetch_msg_or(12, lane) as _,
318                                    self.fetch_msg_or(13, lane) as _,
319                                    self.fetch_msg_or(14, lane) as _,
320                                    self.fetch_msg_or(15, lane) as _,
321                                ),
322                            );
323                        }
324                    }
325                }
326
327                #[cfg(target_feature = "avx2")]
328                let mut itoa_buf = if NO_TRAILING_ZEROS && ON_REGISTER_BOUNDARY {
329                    Align16(*b"0000\x80100")
330                } else {
331                    Align16(*b"0000\x80000")
332                };
333
334                let mut next_inner_key = if NO_TRAILING_ZEROS { 2 } else { 1 };
335                while next_inner_key <= 10_000_000 {
336                    let mut state0 = prepared_state;
337                    let mut state1 = prepared_state;
338                    let mut state2 = prepared_state;
339                    let mut state3 = prepared_state;
340
341                    crate::sha256::sha_ni::multiway_arx_abef_cdgh::<{ DIGIT_WORD_IDX0_DIV_4 }, 4, _>(
342                        [&mut state0, &mut state1, &mut state2, &mut state3],
343                        (&self.message.message).into(),
344                        LaneIdPlucker::<
345                            DIGIT_WORD_IDX0_DIV_4,
346                            DIGIT_WORD_IDX0_MOD_4,
347                            DIGIT_WORD_IDX1_INC,
348                        > {
349                            lane_0_or_value: &lane_id_0_or_value,
350                            lane_1_or_value: &lane_id_1_or_value,
351                        },
352                    );
353
354                    // paddd is basically free on modern CPUs so do the feedback uncondtionally
355                    state0[0] = _mm_add_epi32(state0[0], feedback_ab);
356                    state1[0] = _mm_add_epi32(state1[0], feedback_ab);
357                    state2[0] = _mm_add_epi32(state2[0], feedback_ab);
358                    state3[0] = _mm_add_epi32(state3[0], feedback_ab);
359
360                    let cmp_fn = |x: &u64, y: &u64| {
361                        if TYPE == crate::solver::SOLVE_TYPE_GT {
362                            x > y
363                        } else if TYPE == crate::solver::SOLVE_TYPE_LT {
364                            x < y
365                        } else {
366                            x & mask == y & mask
367                        }
368                    };
369
370                    let success_lane_idx = {
371                        let result_abs = [
372                            _mm_extract_epi64(state0[0], 1) as u64,
373                            _mm_extract_epi64(state1[0], 1) as u64,
374                            _mm_extract_epi64(state2[0], 1) as u64,
375                            _mm_extract_epi64(state3[0], 1) as u64,
376                        ];
377
378                        result_abs.iter().position(|x| cmp_fn(x, &target))
379                    };
380
381                    if let Some(success_lane_idx) = success_lane_idx {
382                        crate::unlikely();
383
384                        let nonce_prefix = nonce_prefix_start + success_lane_idx as u32;
385
386                        // stamp the lane ID back onto the message
387                        {
388                            let message_bytes = decompose_blocks_mut(&mut self.message.message);
389                            *message_bytes.get_unchecked_mut(
390                                *SWAP_DWORD_BYTE_ORDER.get_unchecked(self.message.digit_index),
391                            ) = (nonce_prefix / 10) as u8 + b'0';
392                            *message_bytes.get_unchecked_mut(
393                                *SWAP_DWORD_BYTE_ORDER.get_unchecked(self.message.digit_index + 1),
394                            ) = (nonce_prefix % 10) as u8 + b'0';
395                        }
396
397                        let mut prev_inner_key = next_inner_key - 1;
398                        if NO_TRAILING_ZEROS && prev_inner_key % 10 == 0 {
399                            prev_inner_key -= 1;
400                        }
401
402                        return Some(nonce_prefix as u64 * 10u64.pow(7) + prev_inner_key);
403                    }
404
405                    if NO_TRAILING_ZEROS && next_inner_key % 10 == 0 {
406                        next_inner_key += 1;
407                    }
408
409                    #[cfg(target_feature = "avx2")]
410                    {
411                        if ON_REGISTER_BOUNDARY {
412                            crate::strings::simd_itoa8::<7, true, 0x80>(
413                                self.message
414                                    .message
415                                    .as_mut_ptr()
416                                    .add(DIGIT_WORD_IDX0_DIV_4 * 4 + DIGIT_WORD_IDX0_MOD_4 + 1)
417                                    .cast::<Align16<[u8; 8]>>()
418                                    .as_mut()
419                                    .unwrap(),
420                                next_inner_key as u32,
421                            );
422                        } else {
423                            crate::strings::simd_itoa8::<7, false, 0x80>(
424                                &mut itoa_buf,
425                                next_inner_key as u32,
426                            );
427                            for i in 0..7 {
428                                let message_bytes = decompose_blocks_mut(&mut self.message.message);
429                                *message_bytes.get_unchecked_mut(
430                                    *SWAP_DWORD_BYTE_ORDER
431                                        .get_unchecked(self.message.digit_index + i + 2),
432                                ) = itoa_buf[i];
433                            }
434                        }
435                    }
436
437                    #[cfg(not(target_feature = "avx2"))]
438                    {
439                        let mut key_copy = next_inner_key;
440                        {
441                            let message_bytes = decompose_blocks_mut(&mut self.message.message);
442
443                            for i in (0..7).rev() {
444                                let output = key_copy % 10;
445                                key_copy /= 10;
446                                *message_bytes.get_unchecked_mut(
447                                    *SWAP_DWORD_BYTE_ORDER
448                                        .get_unchecked(self.message.digit_index + i + 2),
449                                ) = output as u8 + b'0';
450                            }
451                        }
452                    }
453
454                    self.attempted_nonces += 4;
455                    if self.attempted_nonces >= self.limit {
456                        return None;
457                    }
458                    next_inner_key += 1;
459                }
460            }
461        }
462        None
463    }
464}
465
466/// SHA-NI decimal nonce double block solver.
467///
468///
469/// Current implementation: 4 way multi-issue with 4-round hotstart granularity.
470pub struct DoubleBlockSolver {
471    message: DoubleBlockMessage,
472    attempted_nonces: u64,
473
474    limit: u64,
475}
476
477impl From<super::safe::DoubleBlockSolver> for DoubleBlockSolver {
478    fn from(solver: super::safe::DoubleBlockSolver) -> Self {
479        Self {
480            message: solver.message,
481            attempted_nonces: solver.attempted_nonces,
482            limit: solver.limit,
483        }
484    }
485}
486
487impl From<DoubleBlockMessage> for DoubleBlockSolver {
488    fn from(message: DoubleBlockMessage) -> Self {
489        Self {
490            message,
491            attempted_nonces: 0,
492            limit: u64::MAX,
493        }
494    }
495}
496
497impl crate::solver::Solver for DoubleBlockSolver {
498    fn set_limit(&mut self, limit: u64) {
499        self.limit = limit;
500    }
501
502    fn get_attempted_nonces(&self) -> u64 {
503        self.attempted_nonces
504    }
505    fn solve<const TYPE: u8>(&mut self, target: u64, mask: u64) -> Option<(u64, [u32; 8])> {
506        unsafe { self.solve_impl::<TYPE>(target, mask) }
507    }
508}
509
510impl DoubleBlockSolver {
511    #[target_feature(enable = "sse4.1,sha")]
512    fn solve_impl<const TYPE: u8>(&mut self, target: u64, mask: u64) -> Option<(u64, [u32; 8])> {
513        if self.attempted_nonces >= self.limit {
514            return None;
515        }
516
517        let target = target & mask;
518
519        for i in (DoubleBlockMessage::DIGIT_IDX as usize..).take(9) {
520            let message = decompose_blocks_mut(&mut self.message.message);
521            message[SWAP_DWORD_BYTE_ORDER[i]] = b'0';
522        }
523
524        let iv_state = crate::sha256::sha_ni::prepare_state(&self.message.prefix_state);
525        let mut prefix_state = Align16(self.message.prefix_state);
526        crate::sha256::sha2_arx::<0>(&mut prefix_state, &self.message.message[..12]);
527        let prepared_state = crate::sha256::sha_ni::prepare_state(&prefix_state);
528
529        let mut terminal_message = Align16([0; 16]);
530        terminal_message[14] = ((self.message.message_length * 8) >> 32) as u32;
531        terminal_message[15] = (self.message.message_length * 8) as u32;
532
533        for nonce_prefix_start in (10u32..=96).step_by(4) {
534            unsafe {
535                const fn to_ascii_u32(input: u32) -> u32 {
536                    let high_digit = input / 10;
537                    let low_digit = input % 10;
538                    u32::from_be_bytes([0, 0, high_digit as u8 + b'0', low_digit as u8 + b'0'])
539                }
540                let lane_index_value_v = [
541                    to_ascii_u32(nonce_prefix_start) | self.message.message[13],
542                    to_ascii_u32(nonce_prefix_start + 1) | self.message.message[13],
543                    to_ascii_u32(nonce_prefix_start + 2) | self.message.message[13],
544                    to_ascii_u32(nonce_prefix_start + 3) | self.message.message[13],
545                ];
546
547                for inner_key in 0..10_000_000 {
548                    let mut states0 = prepared_state;
549                    let mut states1 = prepared_state;
550                    let mut states2 = prepared_state;
551                    let mut states3 = prepared_state;
552
553                    let mut key_copy = inner_key;
554                    let mut cum0 = 0;
555                    for _ in 0..4 {
556                        cum0 <<= 8;
557                        cum0 |= key_copy % 10;
558                        key_copy /= 10;
559                    }
560                    cum0 |= u32::from_be_bytes(*b"0000");
561                    let mut cum1 = 0;
562                    for _ in 0..3 {
563                        cum1 += key_copy % 10;
564                        cum1 <<= 8;
565                        key_copy /= 10;
566                    }
567                    cum1 |= u32::from_be_bytes(*b"000\x80");
568
569                    if key_copy != 0 {
570                        debug_assert_eq!(key_copy, 0);
571                        core::hint::unreachable_unchecked();
572                    }
573
574                    let mut msg0 = Align16([0; 16]);
575                    msg0[..13].copy_from_slice(self.message.message[..13].try_into().unwrap());
576                    msg0[14] = cum0;
577                    msg0[15] = cum1;
578
579                    struct LaneIdPlucker<'a> {
580                        lane_index_value_v: &'a [u32; 4],
581                    }
582                    impl<'a> crate::sha256::sha_ni::Plucker for LaneIdPlucker<'a> {
583                        #[inline(always)]
584                        fn pluck_qword3(&mut self, lane: usize, w: &mut __m128i) {
585                            *w = unsafe {
586                                _mm_or_si128(
587                                    *w,
588                                    _mm_setr_epi32(0, self.lane_index_value_v[lane] as _, 0, 0),
589                                )
590                            };
591                        }
592                    }
593
594                    crate::sha256::sha_ni::multiway_arx_abef_cdgh::<3, 4, LaneIdPlucker>(
595                        [&mut states0, &mut states1, &mut states2, &mut states3],
596                        &msg0,
597                        LaneIdPlucker {
598                            lane_index_value_v: &lane_index_value_v,
599                        },
600                    );
601
602                    for s in [&mut states0, &mut states1, &mut states2, &mut states3] {
603                        s.iter_mut()
604                            .zip(iv_state.iter())
605                            .for_each(|(state, iv_state)| {
606                                *state = _mm_add_epi32(*state, *iv_state);
607                            });
608                    }
609
610                    let save_abs = [states0[0], states1[0], states2[0], states3[0]];
611
612                    // this isn't really SIMD so we can't really amortize the cost of fetching message schedule
613                    // so let's compute it with sha-ni
614                    crate::sha256::sha_ni::multiway_arx_abef_cdgh::<0, 4, _>(
615                        [&mut states0, &mut states1, &mut states2, &mut states3],
616                        &terminal_message,
617                        (),
618                    );
619
620                    states0[0] = _mm_add_epi32(states0[0], save_abs[0]);
621                    states1[0] = _mm_add_epi32(states1[0], save_abs[1]);
622                    states2[0] = _mm_add_epi32(states2[0], save_abs[2]);
623                    states3[0] = _mm_add_epi32(states3[0], save_abs[3]);
624
625                    let final_abs = [
626                        _mm_extract_epi64(states0[0], 1) as u64,
627                        _mm_extract_epi64(states1[0], 1) as u64,
628                        _mm_extract_epi64(states2[0], 1) as u64,
629                        _mm_extract_epi64(states3[0], 1) as u64,
630                    ];
631
632                    let cmp_fn = |x: &u64, y: &u64| {
633                        if TYPE == crate::solver::SOLVE_TYPE_GT {
634                            x > y
635                        } else if TYPE == crate::solver::SOLVE_TYPE_LT {
636                            x < y
637                        } else {
638                            x & mask == y & mask
639                        }
640                    };
641
642                    let success_lane_idx = final_abs.iter().position(|x| cmp_fn(x, &target));
643
644                    if let Some(success_lane_idx) = success_lane_idx {
645                        crate::unlikely();
646
647                        let nonce_prefix = nonce_prefix_start + success_lane_idx as u32;
648                        self.message.message[13] = lane_index_value_v[success_lane_idx];
649                        self.message.message[14] = cum0;
650                        self.message.message[15] = cum1;
651
652                        // recompute the hash from the beginning
653                        // this prevents the compiler from having to compute the final B-H registers alive in tight loops
654                        let mut final_sha_state = self.message.prefix_state;
655                        crate::sha256::digest_block(&mut final_sha_state, &self.message.message);
656                        crate::sha256::digest_block(&mut final_sha_state, &terminal_message);
657
658                        // reverse the byte order
659                        let mut nonce_suffix = 0;
660                        let mut key_copy = inner_key;
661                        for _ in 0..7 {
662                            nonce_suffix *= 10;
663                            nonce_suffix += key_copy % 10;
664                            key_copy /= 10;
665                        }
666
667                        let computed_nonce = nonce_prefix as u64 * 10u64.pow(7)
668                            + nonce_suffix as u64
669                            + self.message.nonce_addend;
670
671                        // the nonce is the 8 digits in the message, plus the first two digits recomputed from the lane index
672                        return Some((computed_nonce, *final_sha_state));
673                    }
674
675                    self.attempted_nonces += 4;
676
677                    if self.attempted_nonces >= self.limit {
678                        return None;
679                    }
680                }
681            }
682        }
683        crate::unlikely();
684
685        None
686    }
687}
688
689#[macro_use]
690#[path = "impl_decimal_solver.rs"]
691mod impl_decimal_solver;
692
693impl_decimal_solver!(
694    [SingleBlockSolver, DoubleBlockSolver] => DecimalSolver
695);
696
697/// SHA-NI GoAway solver.
698///
699///
700/// Current implementation: 4 way multi-issued solver with 4-round hotstart granularity.
701pub struct GoAwaySolver {
702    message: GoAwayMessage,
703    attempted_nonces: u64,
704    limit: u64,
705}
706
707impl From<super::safe::GoAwaySolver> for GoAwaySolver {
708    fn from(solver: super::safe::GoAwaySolver) -> Self {
709        Self {
710            message: solver.message,
711            attempted_nonces: solver.attempted_nonces,
712            limit: solver.limit,
713        }
714    }
715}
716
717impl From<GoAwayMessage> for GoAwaySolver {
718    fn from(message: GoAwayMessage) -> Self {
719        Self {
720            message,
721            attempted_nonces: 0,
722            limit: u64::MAX,
723        }
724    }
725}
726
727impl GoAwaySolver {
728    const MSG_LEN: u32 = 10 * 4 * 8;
729}
730
731impl crate::solver::Solver for GoAwaySolver {
732    fn set_limit(&mut self, limit: u64) {
733        self.limit = limit;
734    }
735
736    fn get_attempted_nonces(&self) -> u64 {
737        self.attempted_nonces
738    }
739
740    fn solve<const TYPE: u8>(&mut self, target: u64, mask: u64) -> Option<(u64, [u32; 8])> {
741        unsafe { self.solve_impl::<TYPE>(target, mask) }
742    }
743}
744
745impl GoAwaySolver {
746    #[target_feature(enable = "sse4.1,sha")]
747    fn solve_impl<const TYPE: u8>(&mut self, target: u64, mask: u64) -> Option<(u64, [u32; 8])> {
748        #[allow(unused_unsafe)]
749        unsafe {
750            let target = target & mask;
751
752            let mut prefix_state = Align16(crate::sha256::IV);
753            crate::sha256::ingest_message_prefix(&mut prefix_state, self.message.challenge);
754            let prepared_state = crate::sha256::sha_ni::prepare_state(&prefix_state);
755
756            let feedback_ab = {
757                let lows = _mm_cvtsi64x_si128(
758                    ((crate::sha256::IV[0] as u64) << 32 | crate::sha256::IV[1] as u64) as _,
759                );
760
761                _mm_shuffle_epi32(lows, 0b01001010)
762            };
763
764            {
765                for low_word in (0..=u32::MAX).step_by(4) {
766                    let mut states0 = prepared_state;
767                    let mut states1 = prepared_state;
768                    let mut states2 = prepared_state;
769                    let mut states3 = prepared_state;
770
771                    let mut msg0 = Align16([0; 16]);
772                    msg0[0..8].copy_from_slice(&self.message.challenge);
773                    msg0[8] = self.message.high_word;
774                    msg0[9] = low_word;
775                    msg0[10] = u32::from_be_bytes([0x80, 0, 0, 0]);
776                    msg0[15] = Self::MSG_LEN as _;
777
778                    struct LaneIdPlucker;
779                    impl crate::sha256::sha_ni::Plucker for LaneIdPlucker {
780                        #[inline(always)]
781                        fn pluck_qword2(&mut self, lane: usize, w: &mut __m128i) {
782                            *w = unsafe { _mm_or_si128(*w, _mm_setr_epi32(0, lane as _, 0, 0)) };
783                        }
784                    }
785
786                    crate::sha256::sha_ni::multiway_arx_abef_cdgh::<2, 4, _>(
787                        [&mut states0, &mut states1, &mut states2, &mut states3],
788                        &msg0,
789                        LaneIdPlucker,
790                    );
791
792                    states0[0] = _mm_add_epi32(states0[0], feedback_ab);
793                    states1[0] = _mm_add_epi32(states1[0], feedback_ab);
794                    states2[0] = _mm_add_epi32(states2[0], feedback_ab);
795                    states3[0] = _mm_add_epi32(states3[0], feedback_ab);
796
797                    let result_abs = [
798                        _mm_extract_epi64(states0[0], 1) as u64,
799                        _mm_extract_epi64(states1[0], 1) as u64,
800                        _mm_extract_epi64(states2[0], 1) as u64,
801                        _mm_extract_epi64(states3[0], 1) as u64,
802                    ];
803
804                    let cmp_fn = |x: &u64, y: &u64| {
805                        if TYPE == crate::solver::SOLVE_TYPE_GT {
806                            x > y
807                        } else if TYPE == crate::solver::SOLVE_TYPE_LT {
808                            x < y
809                        } else {
810                            x & mask == y & mask
811                        }
812                    };
813
814                    let success_lane_idx = result_abs.iter().position(|x| cmp_fn(x, &target));
815
816                    self.attempted_nonces += 4;
817
818                    if let Some(success_lane_idx) = success_lane_idx {
819                        crate::unlikely();
820
821                        let mut output_msg: [u32; 16] = [0; 16];
822
823                        let final_low_word = low_word | (success_lane_idx as u32);
824                        output_msg[..8].copy_from_slice(&self.message.challenge);
825                        output_msg[8] = self.message.high_word;
826                        output_msg[9] = final_low_word;
827                        output_msg[10] = u32::from_be_bytes([0x80, 0, 0, 0]);
828                        output_msg[15] = Self::MSG_LEN as _;
829
830                        let mut final_sha_state = crate::sha256::IV;
831                        crate::sha256::digest_block(&mut final_sha_state, &output_msg);
832
833                        return Some((
834                            (self.message.high_word as u64) << 32 | final_low_word as u64,
835                            final_sha_state,
836                        ));
837                    }
838
839                    if self.attempted_nonces >= self.limit {
840                        return None;
841                    }
842                }
843            }
844        }
845
846        crate::unlikely();
847        None
848    }
849}
850
851#[cfg(all(target_feature = "sse4.1", target_feature = "sha"))]
852#[cfg(test)]
853mod tests {
854    use super::*;
855
856    #[test]
857    fn test_solve_decimal() {
858        crate::solver::tests::test_decimal_validator::<DecimalSolver, _>(|prefix, search_space| {
859            if let Some(solver) = SingleBlockMessage::new(prefix, search_space).map(Into::into) {
860                Some(DecimalSolver::SingleBlock(solver))
861            } else {
862                DoubleBlockMessage::new(prefix, search_space).map(Into::into)
863            }
864        });
865    }
866
867    #[test]
868    fn test_solve_decimal_f64() {
869        crate::solver::tests::test_decimal_validator_f64_safe::<DecimalSolver, _>(
870            |prefix, search_space| {
871                if let Some((solver, p)) =
872                    SingleBlockMessage::new_f64(prefix, search_space).map(|(x, p)| (x.into(), p))
873                {
874                    Some((DecimalSolver::SingleBlock(solver), p))
875                } else {
876                    DoubleBlockMessage::new(prefix, search_space)
877                        .map(|x| (DecimalSolver::DoubleBlock(x.into()), None))
878                }
879            },
880        );
881    }
882
883    #[test]
884    fn test_solve_goaway() {
885        crate::solver::tests::test_goaway_validator::<GoAwaySolver, _>(|prefix| {
886            GoAwaySolver::from(GoAwayMessage::new(
887                core::array::from_fn(|i| {
888                    u32::from_be_bytes([
889                        prefix[i * 4],
890                        prefix[i * 4 + 1],
891                        prefix[i * 4 + 2],
892                        prefix[i * 4 + 3],
893                    ])
894                }),
895                0,
896            ))
897        });
898    }
899}