Skip to main content

pow_buster/
message.rs

1#![allow(clippy::inconsistent_digit_grouping)]
2#![allow(clippy::collapsible_if)]
3use core::num::NonZeroU8;
4
5use crate::{Align16, Align64, blake3, sha256};
6
7/// Solves an mCaptcha/Anubis/Cap.js SHA256 PoW where the SHA-256 message is a single block (512 bytes minus padding).
8///
9/// Construct: Proof := (prefix || ASCII_DECIMAL(nonce))
10///
11/// Currently the mutating part is always 9 digits long.
12#[derive(Debug, Clone)]
13pub struct SingleBlockMessage {
14    /// the message template for the final block, pre-padded except for the mutating part
15    pub message: Align64<[u32; 16]>,
16
17    /// the SHA-256 midstate for the previous block
18    pub prefix_state: [u32; 8],
19
20    /// the index of the mutating part of the digits in the message
21    pub digit_index: usize,
22
23    /// the nonce addend
24    pub nonce_addend: u64,
25
26    /// the approximate working set count
27    pub approx_working_set_count: core::num::NonZeroU32,
28
29    /// whether no trailing zeros are allowed
30    pub no_trailing_zeros: bool,
31}
32
33#[derive(Debug, Clone, Copy)]
34/// a prefix for stretching nonces whose vallidation accepts IEEE 754 double precision floats
35///
36/// This allows JS numbers to be stretched as long as a regular u64 integer allowing code reuse.
37pub struct IEEE754LosslessFixupPrefix {
38    buf: [u8; 9],
39    cut: usize,
40}
41
42impl IEEE754LosslessFixupPrefix {
43    /// fixes up a nonce before sending it to the server
44    #[inline(always)]
45    pub const fn fixup(&self, nonce: u64) -> f64 {
46        let mut decimals = 0.00000_000_000_001 * nonce as f64;
47        let tens = self.buf[1] - b'0';
48        let ones = self.buf[2] - b'0';
49        decimals += (tens * 10 + ones) as f64;
50        if self.cut == 0 {
51            decimals = -decimals;
52        }
53        decimals
54    }
55}
56
57impl AsRef<[u8]> for IEEE754LosslessFixupPrefix {
58    fn as_ref(&self) -> &[u8] {
59        &self.buf[self.cut..]
60    }
61}
62
63impl SingleBlockMessage {
64    /// creates a new single block message
65    pub fn new(mut prefix: &[u8], mut working_set: u32) -> Option<Self> {
66        // construct the message buffer
67        let mut prefix_state = sha256::IV;
68        let mut nonce_addend = 0u64;
69        let mut complete_blocks_before = 0;
70        let mut approx_working_set_count = 1u32;
71
72        // first consume all full blocks, this is shared so use scalar reference implementation
73        while prefix.len() >= 64 {
74            sha256::digest_block(
75                &mut prefix_state,
76                &core::array::from_fn(|i| {
77                    u32::from_be_bytes([
78                        prefix[i * 4],
79                        prefix[i * 4 + 1],
80                        prefix[i * 4 + 2],
81                        prefix[i * 4 + 3],
82                    ])
83                }),
84            );
85            prefix = &prefix[64..];
86            complete_blocks_before += 1;
87        }
88
89        let mut is_fitst_digit = true;
90        let mut pop_padding_digit = || {
91            if is_fitst_digit {
92                is_fitst_digit = false;
93                1u8
94            } else {
95                approx_working_set_count = approx_working_set_count.saturating_mul(10);
96                let digit = working_set % 10;
97                working_set /= 10;
98                digit as u8
99            }
100        };
101
102        // greedy padding logic
103
104        // priority 0: if there is not enough room for 9 bytes of padding, pad with '1's and then start a new block whenever possible
105        // this avoids having to hash 2 blocks per iteration a naive solution would do
106        if prefix.len() + 9 + 9 > 64 {
107            let mut tmp_block = [0; 64];
108            tmp_block[..prefix.len()].copy_from_slice(prefix);
109            tmp_block[prefix.len()..].iter_mut().for_each(|b| {
110                let pad = pop_padding_digit();
111                nonce_addend *= 10;
112                nonce_addend += pad as u64;
113                *b = b'0' + pad;
114            });
115            nonce_addend.checked_mul(1_000_000_000)?; // make sure we still have enough headroom
116            complete_blocks_before += 1;
117            prefix = &[];
118            sha256::digest_block(
119                &mut prefix_state,
120                &core::array::from_fn(|i| {
121                    u32::from_be_bytes([
122                        tmp_block[i * 4],
123                        tmp_block[i * 4 + 1],
124                        tmp_block[i * 4 + 2],
125                        tmp_block[i * 4 + 3],
126                    ])
127                }),
128            );
129        }
130
131        let mut message: [u8; 64] = [0; 64];
132        let mut ptr = 0;
133        message[..prefix.len()].copy_from_slice(prefix);
134        ptr += prefix.len();
135
136        // we used to not do these more subtle optimizations as it is not typical for mCaptcha
137        // but all Anubis deployments start at offset 0, so there is very good incentive to micro-optimize
138        if ptr <= 35 {
139            // priority 1: try to pad to an even position to minimize the need to poke 2 words for the lane ID
140            if ptr % 2 == 1 {
141                if nonce_addend.checked_mul(10_000_000_000 * 2).is_some() {
142                    nonce_addend *= 10;
143                    let pad = pop_padding_digit();
144                    nonce_addend += pad as u64;
145                    message[ptr] = b'0' + pad;
146                    ptr += 1;
147                }
148            }
149            // priority 2: try to pad such that the inner nonce is at a register boundary and PSHUFD shortcut can be used (minus the lane ID)
150            while (ptr + 2) % 4 != 0 {
151                if nonce_addend.checked_mul(10_000_000_000 * 2).is_some() {
152                    nonce_addend *= 10;
153                    let pad = pop_padding_digit();
154                    nonce_addend += pad as u64;
155                    message[ptr] = b'0' + pad;
156                    ptr += 1;
157                } else {
158                    break;
159                }
160            }
161            // priority 3: try to move the mutating part into later part of the final block to skim a couple rounds
162            // times 2 because for some reason anubis uses signed nonces ... I wonder if we can send negative nonces
163            while nonce_addend
164                .checked_mul(10000 * 1_000_000_000 * 2)
165                .is_some()
166            {
167                let pad0 = pop_padding_digit();
168                let pad1 = pop_padding_digit();
169                let pad2 = pop_padding_digit();
170                let pad3 = pop_padding_digit();
171                nonce_addend *= 10000;
172                nonce_addend +=
173                    pad0 as u64 * 1000 + pad1 as u64 * 100 + pad2 as u64 * 10 + pad3 as u64;
174                message[ptr] = b'0' + pad0;
175                message[ptr + 1] = b'0' + pad1;
176                message[ptr + 2] = b'0' + pad2;
177                message[ptr + 3] = b'0' + pad3;
178                ptr += 4;
179            }
180        }
181        // a double block solver must be used because not enough digits can bridge the 9 byte overhead
182        nonce_addend = nonce_addend.checked_mul(1_000_000_000)?;
183
184        let digit_index = ptr;
185
186        // skip 9 zeroes, this is the part we will interpolate N into
187        // the first 2 digits are used as the lane index (10 + (0..16)*(0..4), offset to avoid leading zeroes), this also keeps our proof plausible
188        // the rest are randomly generated then broadcasted to all lanes
189        // this gives us about 16e7 * 4 possible attempts, likely enough for any realistic deployment even on the highest difficulty
190        // the fail rate would be pgeom(keySpace, 1/difficulty, lower=F) in R
191        ptr += 9;
192
193        // set up padding
194        message[ptr] = 0x80;
195        message[(64 - 8)..]
196            .copy_from_slice(&((complete_blocks_before * 64 + ptr) as u64 * 8).to_be_bytes());
197
198        if working_set != 0 {
199            return None;
200        }
201
202        Some(Self {
203            message: Align64(core::array::from_fn(|i| {
204                u32::from_be_bytes([
205                    message[i * 4],
206                    message[i * 4 + 1],
207                    message[i * 4 + 2],
208                    message[i * 4 + 3],
209                ])
210            })),
211            prefix_state,
212            digit_index,
213            nonce_addend,
214            approx_working_set_count: approx_working_set_count.try_into().unwrap(),
215            no_trailing_zeros: false,
216        })
217    }
218
219    /// Create a new single block message using only IEEE 754 double precision floats that can stringify losslessly
220    ///
221    /// The caller is expected to append the bytes from the prefix to the nonce before sending it to the server.
222    pub fn new_f64(
223        mut prefix: &[u8],
224        mut working_set: u32,
225    ) -> Option<(Self, Option<IEEE754LosslessFixupPrefix>)> {
226        // construct the message buffer
227        let mut prefix_state = sha256::IV;
228        let mut nonce_addend = 0u64;
229        let mut complete_blocks_before = 0;
230        let mut approx_working_set_count = 1;
231
232        // first consume all full blocks, this is shared so use scalar reference implementation
233        while prefix.len() >= 64 {
234            sha256::digest_block(
235                &mut prefix_state,
236                &core::array::from_fn(|i| {
237                    u32::from_be_bytes([
238                        prefix[i * 4],
239                        prefix[i * 4 + 1],
240                        prefix[i * 4 + 2],
241                        prefix[i * 4 + 3],
242                    ])
243                }),
244            );
245            prefix = &prefix[64..];
246            complete_blocks_before += 1;
247        }
248
249        let mut message: [u8; 64] = [0; 64]; // the final message
250        let mut ptr = 0;
251        let mut fixup_prefix = None;
252
253        if (55..=57).contains(&prefix.len()) {
254            // a separate program is ran to make sure these numbers
255            // parse and stringifies losslessly
256            if working_set >= 32 {
257                return None;
258            }
259            let working_set_msb = working_set / 10;
260            let working_set_lsb = working_set % 10;
261            let mut tmp_block = [0; 64];
262            tmp_block[..prefix.len()].copy_from_slice(prefix);
263            let space = 64 - prefix.len();
264            let mut padding: IEEE754LosslessFixupPrefix = IEEE754LosslessFixupPrefix {
265                buf: *b"-10.00000",
266                cut: 0,
267            };
268            if prefix.len() == 56 {
269                padding.cut = 1;
270            }
271            let mut padding_buf = padding.buf;
272            padding_buf[1] = b'1' + working_set_msb as u8;
273            padding_buf[2] = b'0' + working_set_lsb as u8;
274            let pad = &padding_buf.as_slice()[padding.cut..];
275            let (pad_left, pad_right) = pad.split_at(space);
276            tmp_block[prefix.len()..].copy_from_slice(pad_left);
277            sha256::digest_block(
278                &mut prefix_state,
279                &core::array::from_fn(|i| {
280                    u32::from_be_bytes([
281                        tmp_block[i * 4],
282                        tmp_block[i * 4 + 1],
283                        tmp_block[i * 4 + 2],
284                        tmp_block[i * 4 + 3],
285                    ])
286                }),
287            );
288            prefix = &[];
289            complete_blocks_before += 1;
290            message[..pad_right.len()].copy_from_slice(pad_right);
291            ptr += pad_right.len();
292            working_set = 0;
293            approx_working_set_count = 32 - working_set;
294            fixup_prefix = Some(padding);
295        }
296
297        let mut is_fitst_digit = true;
298        let mut pop_padding_digit = || {
299            if is_fitst_digit {
300                is_fitst_digit = false;
301                1u8
302            } else {
303                approx_working_set_count = approx_working_set_count.saturating_mul(10);
304                let digit = working_set % 10;
305                working_set /= 10;
306                digit as u8
307            }
308        };
309
310        // greedy padding logic
311
312        // priority 0: if there is not enough room for 9 bytes of padding, pad with '0.(...)'s and then start a new block whenever possible
313        // this avoids having to hash 2 blocks per iteration a naive solution would do
314        if prefix.len() + 9 + 9 > 64 {
315            let mut tmp_block = [0; 64];
316            tmp_block[..prefix.len()].copy_from_slice(prefix);
317            tmp_block[prefix.len()..].iter_mut().for_each(|b| {
318                let pad = pop_padding_digit();
319                nonce_addend *= 10;
320                nonce_addend += pad as u64;
321                *b = b'0' + pad;
322            });
323            nonce_addend.checked_mul(1_000_000_000)?; // make sure we still have enough headroom
324            complete_blocks_before += 1;
325            prefix = &[];
326            sha256::digest_block(
327                &mut prefix_state,
328                &core::array::from_fn(|i| {
329                    u32::from_be_bytes([
330                        tmp_block[i * 4],
331                        tmp_block[i * 4 + 1],
332                        tmp_block[i * 4 + 2],
333                        tmp_block[i * 4 + 3],
334                    ])
335                }),
336            );
337        }
338
339        message[..prefix.len()].copy_from_slice(prefix);
340        ptr += prefix.len();
341
342        // we used to not do these more subtle optimizations as it is not typical for mCaptcha
343        // but all Anubis deployments start at offset 0, so there is very good incentive to micro-optimize
344        if ptr <= 35 && fixup_prefix.is_none() {
345            // priority 1: try to pad to an even position to minimize the need to poke 2 words for the lane ID
346            if ptr % 2 == 1 {
347                if nonce_addend
348                    .checked_mul(10_000_000_000 * 2)
349                    .filter(|x| *x < 1_000_000_000_000_000)
350                    .is_some()
351                {
352                    nonce_addend *= 10;
353                    let pad = pop_padding_digit();
354                    nonce_addend += pad as u64;
355                    message[ptr] = b'0' + pad;
356                    ptr += 1;
357                }
358            }
359            // priority 2: try to pad such that the inner nonce is at a register boundary and PSHUFD shortcut can be used (minus the lane ID)
360            while (ptr + 2) % 4 != 0 {
361                if nonce_addend
362                    .checked_mul(10_000_000_000 * 2)
363                    .filter(|x| *x < 1_000_000_000_000_000)
364                    .is_some()
365                {
366                    nonce_addend *= 10;
367                    let pad = pop_padding_digit();
368                    nonce_addend += pad as u64;
369                    message[ptr] = b'0' + pad;
370                    ptr += 1;
371                } else {
372                    break;
373                }
374            }
375            // priority 3: try to move the mutating part into later part of the final block to skim a couple rounds
376            // times 2 because for some reason anubis uses signed nonces ... I wonder if we can send negative nonces
377            while nonce_addend
378                .checked_mul(10000 * 1_000_000_000 * 2)
379                .filter(|x| *x < 1_000_000_000_000_000)
380                .is_some()
381            {
382                let pad0 = pop_padding_digit();
383                let pad1 = pop_padding_digit();
384                let pad2 = pop_padding_digit();
385                let pad3 = pop_padding_digit();
386                nonce_addend *= 10000;
387                nonce_addend +=
388                    pad0 as u64 * 1000 + pad1 as u64 * 100 + pad2 as u64 * 10 + pad3 as u64;
389                message[ptr] = b'0' + pad0;
390                message[ptr + 1] = b'0' + pad1;
391                message[ptr + 2] = b'0' + pad2;
392                message[ptr + 3] = b'0' + pad3;
393                ptr += 4;
394            }
395        }
396        // a double block solver must be used because not enough digits can bridge the 9 byte overhead
397        nonce_addend = nonce_addend
398            .checked_mul(1_000_000_000)
399            .filter(|x| *x < 1_000_000_000_000_000)?;
400
401        let digit_index = ptr;
402
403        // skip 9 zeroes, this is the part we will interpolate N into
404        // the first 2 digits are used as the lane index (10 + (0..16)*(0..4), offset to avoid leading zeroes), this also keeps our proof plausible
405        // the rest are randomly generated then broadcasted to all lanes
406        // this gives us about 16e7 * 4 possible attempts, likely enough for any realistic deployment even on the highest difficulty
407        // the fail rate would be pgeom(keySpace, 1/difficulty, lower=F) in R
408        ptr += 9;
409
410        // set up padding
411        message[ptr] = 0x80;
412        message[(64 - 8)..]
413            .copy_from_slice(&((complete_blocks_before * 64 + ptr) as u64 * 8).to_be_bytes());
414
415        if working_set != 0 {
416            return None;
417        }
418
419        Some((
420            Self {
421                message: Align64(core::array::from_fn(|i| {
422                    u32::from_be_bytes([
423                        message[i * 4],
424                        message[i * 4 + 1],
425                        message[i * 4 + 2],
426                        message[i * 4 + 3],
427                    ])
428                })),
429                prefix_state,
430                digit_index,
431                nonce_addend,
432                approx_working_set_count: approx_working_set_count.try_into().unwrap(),
433                no_trailing_zeros: fixup_prefix.is_some(),
434            },
435            fixup_prefix,
436        ))
437    }
438}
439
440/// Solves an mCaptcha/Anubis/Cap.js SHA256 PoW where the SHA-256 message is a double block (1024 bytes minus padding).
441///
442/// Construct: Proof := (prefix || '1' * k || ASCII_DECIMAL(nonce) || '\x80') | ('\0' * 56 + length)
443///
444/// Currently the mutating part is always 9 digits long.
445pub struct DoubleBlockMessage {
446    /// the message template for the final block, pre-padded except for the mutating part
447    pub message: Align64<[u32; 16]>,
448
449    /// the SHA-256 midstate for the previous block
450    pub prefix_state: Align16<[u32; 8]>,
451
452    /// the length of the message in bytes
453    pub message_length: u64,
454
455    /// the nonce addend
456    pub nonce_addend: u64,
457}
458
459impl DoubleBlockMessage {
460    /// the index of the mutating part of the digits in the message
461    pub const DIGIT_IDX: u64 = 54;
462
463    /// creates a new double block message
464    pub fn new(mut prefix: &[u8], mut working_set: u32) -> Option<Self> {
465        // construct the message buffer
466        let mut prefix_state = crate::Align16(sha256::IV);
467
468        let mut complete_blocks_before = 0;
469
470        let mut is_fitst_digit = true;
471        let mut pop_padding_digit = || {
472            if is_fitst_digit {
473                is_fitst_digit = false;
474                1u8
475            } else {
476                let digit = working_set % 10;
477                working_set /= 10;
478                digit as u8
479            }
480        };
481
482        // first consume all full blocks, this is shared so use scalar reference implementation
483        while prefix.len() >= 64 {
484            sha256::digest_block(
485                &mut prefix_state,
486                &core::array::from_fn(|i| {
487                    u32::from_be_bytes([
488                        prefix[i * 4],
489                        prefix[i * 4 + 1],
490                        prefix[i * 4 + 2],
491                        prefix[i * 4 + 3],
492                    ])
493                }),
494            );
495            prefix = &prefix[64..];
496            complete_blocks_before += 1;
497        }
498
499        let mut message: [u8; 64] = [0; 64];
500        let mut ptr = 0;
501        message[..prefix.len()].copy_from_slice(prefix);
502        ptr += prefix.len();
503
504        // pad with ones until we are on a 64-bit boundary minus 2 byte
505        // we have much more leeway here as we are committed to a double block solver, using more bytes is fine, there is nothing useful to be traded off
506        // so we will construct and solve exactly this format, for lane 12 and nonce 3456789:
507        // [prefix + '1' * k + '12' + '3456' + '789\x80'] | ['\0' * 56 + length]
508        let mut nonce_addend = 0;
509        while (ptr + 2) % 8 != 0 {
510            nonce_addend *= 10;
511            let pad = pop_padding_digit();
512            nonce_addend += pad as u64;
513            *message.get_mut(ptr)? = b'0' + pad;
514            ptr += 1;
515        }
516        nonce_addend *= 1_000_000_000;
517
518        // these cases are handled by the single block solver
519        if ptr != Self::DIGIT_IDX as usize {
520            return None;
521        }
522
523        if working_set != 0 {
524            return None;
525        }
526
527        // skip 9 zeroes, this is the part we will interpolate N into
528        // the first 2 digits are used as the lane index (10 + (0..16)*(0..4), offset to avoid leading zeroes)
529        // the rest are randomly generated then broadcasted to all lanes
530        // this gives us about 16e7 * 4 possible attempts, likely enough for any realistic deployment even on the highest difficulty
531        // the fail rate would be pgeom(keySpace, 1/difficulty, lower=F) in R
532        ptr += 9;
533
534        // we should be at the end of the message buffer minus 1
535        debug_assert_eq!(ptr, 63);
536
537        message[ptr] = 0x80;
538
539        let message_length = complete_blocks_before * 64 + ptr as u64;
540
541        Some(Self {
542            prefix_state,
543            message: Align64(core::array::from_fn(|i| {
544                u32::from_be_bytes([
545                    message[i * 4],
546                    message[i * 4 + 1],
547                    message[i * 4 + 2],
548                    message[i * 4 + 3],
549                ])
550            })),
551            nonce_addend,
552            message_length,
553        })
554    }
555}
556
557/// A wrapper that handles both cases for decimal nonces
558pub enum DecimalMessage {
559    /// A single block message
560    SingleBlock(SingleBlockMessage),
561
562    /// A double block message
563    DoubleBlock(DoubleBlockMessage),
564}
565
566impl DecimalMessage {
567    /// creates a new decimal message
568    pub fn new(input: &[u8], working_set: u32) -> Option<Self> {
569        SingleBlockMessage::new(input, working_set)
570            .map(Self::SingleBlock)
571            .or_else(|| DoubleBlockMessage::new(input, working_set).map(Self::DoubleBlock))
572    }
573
574    /// creates a new decimal message using only IEEE 754 double precision floats that can stringify losslessly
575    pub fn new_f64(
576        input: &[u8],
577        working_set: u32,
578    ) -> Option<(Self, Option<IEEE754LosslessFixupPrefix>)> {
579        SingleBlockMessage::new_f64(input, working_set)
580            .map(|(message, fixup_prefix)| (Self::SingleBlock(message), fixup_prefix))
581            .or_else(|| {
582                DoubleBlockMessage::new(input, working_set).map(|x| (Self::DoubleBlock(x), None))
583            })
584    }
585}
586
587/// Binary message with a fixed layout
588#[derive(Debug, Clone)]
589pub struct BinaryMessage {
590    /// the SHA-256 midstate for the previous block
591    pub prefix_state: Align16<[u32; 8]>,
592
593    /// the residual salt
594    pub salt_residual: [u8; 64],
595
596    /// the length of the residual salt
597    pub salt_residual_len: usize,
598
599    /// the number of bytes of the nonce
600    /// Guaranteed to be less or equal to 8
601    pub nonce_byte_count: NonZeroU8,
602
603    /// message length in bytes
604    pub message_length: usize,
605}
606
607impl BinaryMessage {
608    /// creates a new binary message
609    pub fn new(salt: &[u8], nonce_byte_count: NonZeroU8) -> Self {
610        assert!(
611            nonce_byte_count.get() <= 8,
612            "nonce_byte_count must be less or equal to 8"
613        );
614        let mut prefix_state = crate::Align16(sha256::IV);
615
616        let mut chunks = salt.chunks_exact(64);
617        for block in &mut chunks {
618            sha256::digest_block(
619                &mut prefix_state,
620                &core::array::from_fn(|i| {
621                    u32::from_be_bytes([
622                        block[i * 4],
623                        block[i * 4 + 1],
624                        block[i * 4 + 2],
625                        block[i * 4 + 3],
626                    ])
627                }),
628            );
629        }
630        let remainder = chunks.remainder();
631        let mut salt_residual = [0; 64];
632        salt_residual[..remainder.len()].copy_from_slice(remainder);
633        Self {
634            prefix_state,
635            salt_residual,
636            salt_residual_len: remainder.len(),
637            nonce_byte_count,
638            message_length: salt.len() + nonce_byte_count.get() as usize,
639        }
640    }
641}
642
643/// A message in the cerberus format
644///
645/// Note cerberus official solver only supports 32-bit range nonces, but the validator accepts machine sized nonces and should always remain inter-block
646pub enum CerberusMessage {
647    /// A decimal message
648    Decimal(CerberusDecimalMessage),
649    /// A binary message (0.4.6+)
650    Binary(CerberusBinaryMessage),
651}
652
653/// A message in the cerberus binary format (0.4.6+)
654///
655/// Construct: Proof := (prefix || U64(nonce))
656pub struct CerberusBinaryMessage {
657    pub(crate) midstate: Align16<[u32; 8]>,
658    pub(crate) first_word: u32,
659}
660
661impl CerberusBinaryMessage {
662    /// Create a new Cerberus message
663    pub fn new(salt: &[u8], first_word: u32) -> Self {
664        let prehash = ::blake3::hash(salt).to_hex();
665        Self::new_prehashed(prehash.as_bytes().try_into().unwrap(), first_word)
666    }
667
668    /// Create a new Cerberus message with a prehashed midstate
669    pub fn new_prehashed(prehash: &[u8; 64], first_word: u32) -> Self {
670        let mut block = [0; 16];
671        for i in 0..16 {
672            block[i] = u32::from_le_bytes([
673                prehash[i * 4],
674                prehash[i * 4 + 1],
675                prehash[i * 4 + 2],
676                prehash[i * 4 + 3],
677            ]);
678        }
679        let midstate = crate::blake3::compress8(
680            &crate::blake3::IV,
681            &block,
682            0,
683            64,
684            crate::blake3::FLAG_CHUNK_START,
685        );
686
687        Self {
688            midstate: Align16(midstate),
689            first_word,
690        }
691    }
692}
693
694#[derive(Debug, Clone)]
695/// A message in the cerberus decimal format (pre 0.4.6)
696///
697/// Construct: Proof := (prefix || ASCII_GO_INT_DECIMAL(nonce))
698pub struct CerberusDecimalMessage {
699    pub(crate) prefix_state: Align16<[u32; 8]>,
700    pub(crate) salt_residual: Align64<[u8; 64]>,
701    pub(crate) salt_residual_len: usize,
702    pub(crate) flags: u32,
703    pub(crate) nonce_addend: u64,
704}
705
706impl CerberusDecimalMessage {
707    /// Create a new Cerberus message
708    ///
709    /// End-to-end salt construction: `${challenge}|${inputNonce}|${ts}|${signature}|`
710    pub fn new(salt: &[u8], mut working_set: u32) -> Option<Self> {
711        // nonce placement in blake3 is less important than sha256, both early and late salts have strategies to elide computation.
712        //
713        // so we will keep it in 32-bit range just in case we met a 32-bit server, but in practice this is rarely seen.
714        //
715        // u32::MAX is 4294967295 (10 digits), we will pop the first digit as outer loop and at most 3 other blocks need to be mutated
716        // the last block is byte-order sensitive and needs a left shift to fix
717
718        // actual tree-based hashing is not supported yet, it kicks in at 1024 bytes
719        // we can leave 24 bytes of headroom for nonce maneuvering.
720        //
721        // it is also unlikely any challenge will hash such big chunks
722        if salt.len() > 1000 {
723            return None;
724        }
725        let mut chunks = salt.chunks_exact(64);
726        let mut prefix_state = crate::Align16(blake3::IV);
727        let mut flags = blake3::FLAG_CHUNK_START | blake3::FLAG_CHUNK_END | blake3::FLAG_ROOT;
728
729        for (i, block) in (&mut chunks).enumerate() {
730            let block = core::array::from_fn(|i| {
731                u32::from_le_bytes([
732                    block[i * 4],
733                    block[i * 4 + 1],
734                    block[i * 4 + 2],
735                    block[i * 4 + 3],
736                ])
737            });
738            let this_flag = if i == 0 { blake3::FLAG_CHUNK_START } else { 0 };
739
740            let output = blake3::compress8(&prefix_state, &block, 0, 64, this_flag);
741            prefix_state.copy_from_slice(&output);
742            flags &= !blake3::FLAG_CHUNK_START;
743        }
744        let remainder = chunks.remainder();
745        let mut salt_residual = crate::Align64([0; 64]);
746        salt_residual.0[..remainder.len()].copy_from_slice(remainder);
747        let mut ptr = remainder.len();
748
749        let mut nonce_addend = 0;
750        // TODO: figure out how to search more than 9G of nonce space for the edge case of 54 bytes modulo 64
751        // this is far from the typical case for Cerberus so not very important (even the official solver only searches 4G of nonce space)
752        if remainder.len() >= 55 {
753            // not enough room for 9 digits, assume 64-bit server and pad generously
754            let head_digit = (working_set % 8) as u8 + 1; // i64::MAX starts with 9 so we can only use 1-8 as head digit
755            nonce_addend = head_digit as u64;
756            salt_residual.0[remainder.len()] = head_digit as u8 + b'0';
757            working_set /= 8;
758            for x in (remainder.len() + 1)..64 {
759                let digit = working_set % 10;
760                salt_residual.0[x] = digit as u8 + b'0';
761                nonce_addend *= 10;
762                nonce_addend += digit as u64;
763                working_set /= 10;
764            }
765            ptr = 0;
766            let block = core::array::from_fn(|i| {
767                u32::from_le_bytes([
768                    salt_residual[i * 4],
769                    salt_residual[i * 4 + 1],
770                    salt_residual[i * 4 + 2],
771                    salt_residual[i * 4 + 3],
772                ])
773            });
774
775            let output = blake3::compress8(
776                &prefix_state,
777                &block,
778                0,
779                64,
780                blake3::FLAG_CHUNK_START & flags,
781            );
782            prefix_state.copy_from_slice(&output);
783            flags &= !blake3::FLAG_CHUNK_START;
784            salt_residual.fill(0);
785        }
786
787        let head_digit = (working_set % 9) as u8 + 1;
788        salt_residual[ptr] = head_digit as u8 + b'0';
789        nonce_addend *= 10;
790        nonce_addend += head_digit as u64;
791        working_set /= 9;
792        while working_set != 0 {
793            ptr += 1;
794            let digit = working_set % 10;
795            salt_residual[ptr] = digit as u8 + b'0';
796            nonce_addend *= 10;
797            nonce_addend += digit as u64;
798            working_set /= 10;
799        }
800
801        if ptr + 9 >= 64 {
802            return None;
803        }
804
805        ptr += 1;
806
807        for i in 0..9 {
808            salt_residual[ptr + i] = b'0';
809        }
810
811        Some(Self {
812            prefix_state,
813            salt_residual_len: ptr,
814            salt_residual,
815            flags,
816            nonce_addend: nonce_addend * 1_000_000_000,
817        })
818    }
819}
820
821/// A message in the go-away format
822///
823/// Construct: Proof := (prefix || U64(nonce)) where prefix is 32 bytes
824#[derive(Clone)]
825pub struct GoAwayMessage {
826    /// the challenge
827    pub(crate) challenge: [u32; 8],
828    pub(crate) high_word: u32,
829}
830
831impl GoAwayMessage {
832    /// creates a new go-away message
833    pub fn new(challenge: [u32; 8], high_word: u32) -> Self {
834        Self {
835            challenge,
836            high_word,
837        }
838    }
839
840    /// sets the high word of the message
841    pub fn set_high_word(&mut self, high_word: u32) {
842        self.high_word = high_word;
843    }
844
845    /// creates a new go-away message from a 32 byte challenge
846    pub fn new_bytes(challenge: &[u8; 32], high_word: u32) -> Self {
847        Self {
848            challenge: core::array::from_fn(|i| {
849                u32::from_be_bytes([
850                    challenge[i * 4],
851                    challenge[i * 4 + 1],
852                    challenge[i * 4 + 2],
853                    challenge[i * 4 + 3],
854                ])
855            }),
856            high_word,
857        }
858    }
859
860    /// creates a new go-away message from a 64 byte challenge
861    pub fn new_hex(challenge: &[u8; 64], high_word: u32) -> Option<Self> {
862        let mut prefix_fixed_up = [0; 32];
863        for i in 0..32 {
864            let byte_hex: [u8; 2] = challenge[i * 2..][..2].try_into().unwrap();
865            let high_nibble = if byte_hex[0].is_ascii_digit() {
866                byte_hex[0] - b'0'
867            } else if (b'a'..=b'f').contains(&byte_hex[0]) {
868                byte_hex[0] - b'a' + 10
869            } else {
870                return None;
871            };
872            let low_nibble = if byte_hex[1].is_ascii_digit() {
873                byte_hex[1] - b'0'
874            } else if (b'a'..=b'f').contains(&byte_hex[1]) {
875                byte_hex[1] - b'a' + 10
876            } else {
877                return None;
878            };
879            prefix_fixed_up[i] = (high_nibble << 4) | low_nibble;
880        }
881        Some(Self::new_bytes(&prefix_fixed_up, high_word))
882    }
883}
884
885/// A shared precomputed state for expanding CapJS batch challenges
886pub struct CapJSEmitter {
887    seed: u32,
888}
889
890#[inline(always)]
891const fn fnv1a(state: u32, data: u8) -> u32 {
892    let state = state ^ data as u32;
893    state
894        .wrapping_add(state << 1)
895        .wrapping_add(state << 4)
896        .wrapping_add(state << 7)
897        .wrapping_add(state << 8)
898        .wrapping_add(state << 24)
899}
900
901#[inline(always)]
902const fn capjs_lfsr(state: u32) -> u32 {
903    let state = state ^ (state << 13);
904    let state = state ^ (state >> 17);
905    state ^ (state << 5)
906}
907
908impl CapJSEmitter {
909    /// Create a new emitter for a given challenge token
910    pub const fn new(token: &[u8]) -> Self {
911        let mut i = 0;
912        let mut hash = 2166136261;
913        while i < token.len() {
914            hash = fnv1a(hash, token[i]);
915            i += 1;
916        }
917        Self { seed: hash }
918    }
919
920    /// Emit a salt and target for the i-th subgoal
921    ///
922    /// Note: Cap.js nonce index starts from 1
923    pub fn emit(&self, salt: &mut [u8], target: &mut [u32], i: u32) {
924        let mut digits = [0; 10];
925        let mut copy = i;
926        let mut j = 10;
927        loop {
928            j -= 1;
929            digits[j] = (copy % 10) as u8;
930            copy /= 10;
931            if copy == 0 {
932                break;
933            }
934        }
935        let itoa_buf = &digits[j..];
936        let mut salt_state = self.seed;
937        itoa_buf.iter().for_each(|d| {
938            salt_state = fnv1a(salt_state, *d + b'0');
939        });
940        let mut target_state = fnv1a(salt_state, b'd');
941        salt.chunks_mut(8).for_each(|d| {
942            salt_state = capjs_lfsr(salt_state);
943            let state_bytes = salt_state.to_be_bytes();
944            let mut buf = [0; 8];
945            for i in 0..4 {
946                let b = state_bytes[i];
947                let high_nibble = b >> 4;
948                let low_nibble = b & 0xf;
949                buf[i * 2] = if high_nibble < 10 {
950                    high_nibble + b'0'
951                } else {
952                    high_nibble + b'a' - 10
953                };
954                buf[i * 2 + 1] = if low_nibble < 10 {
955                    low_nibble + b'0'
956                } else {
957                    low_nibble + b'a' - 10
958                };
959            }
960            d.copy_from_slice(&buf[..d.len()]);
961        });
962        target.iter_mut().for_each(|d| {
963            target_state = capjs_lfsr(target_state);
964            *d = target_state;
965        });
966    }
967}
968
969#[cfg(test)]
970mod tests {
971    use super::*;
972
973    #[test]
974    fn test_double_block_addend_f64_safe() {
975        let salt = [b'a'; 64];
976        for len in 0..64 {
977            if let Some((single_solver, _fixup_prefix)) =
978                SingleBlockMessage::new_f64(&salt[..len], 0)
979            {
980                assert!(single_solver.nonce_addend <= 1_000_000_000_000_000);
981            } else if let Some(double_solver) = DoubleBlockMessage::new(&salt[..len], 0) {
982                assert!(double_solver.nonce_addend <= 1_000_000_000_000_000);
983            } else {
984                let single_solver = SingleBlockMessage::new(&salt[..len], 0);
985                let double_solver = DoubleBlockMessage::new(&salt[..len], 0);
986                panic!(
987                    "no messagger for length {}: (u64 single block: {:?}, u64 double block: {:?})",
988                    len,
989                    single_solver.is_some(),
990                    double_solver.is_some()
991                );
992            }
993        }
994    }
995
996    #[test]
997    fn test_fnv1a() {
998        let mut state = 2166136261;
999        let data = [240, 159, 166, 132, 240, 159, 140, 136];
1000        data.iter().for_each(|d| {
1001            state = fnv1a(state, *d);
1002        });
1003        assert_eq!(state, 2868248295);
1004    }
1005
1006    #[test]
1007    fn test_capjs_emitter() {
1008        const TOKEN: &[u8] = b"challenge token";
1009        const C: usize = 16;
1010        const S: usize = 30;
1011        const D: usize = 8;
1012        const KNOWN_ANSWER: [([u8; S], [u32; (D + 3) / 8]); C] = [
1013            (*b"0301c97a7ffc19cd647d324f84a2d9", [0xd4f0af52]),
1014            (*b"e2462bfec006f422ced5d703f2842e", [0xdf66e3e3]),
1015            (*b"c9dc0ae6589d3a6605e1344e4e745a", [0x425e36e1]),
1016            (*b"b47a610af330f9c76d0597a321c293", [0x3f743fab]),
1017            (*b"9c8bdae31d2ee808c5de8bff808d54", [0x0e401a5c]),
1018            (*b"488848fd736ecef6fcbb2e6eb7a2f8", [0xcd2ff29f]),
1019            (*b"6ff608b772f74727d86eafeebffe4d", [0x2f937817]),
1020            (*b"075dc1a5524efc3134dbc075d67d85", [0x0cbea335]),
1021            (*b"38f3cb0b73c3f382bbc51e3b184c35", [0xf4e72b04]),
1022            (*b"a01611f327205c07b8b5e0b790c35e", [0x29fe9e9b]),
1023            (*b"4097bda25357605354f6a05d9f5544", [0xcecadb61]),
1024            (*b"f62a5b07141651e21b65a0174385e3", [0x1b5bb71a]),
1025            (*b"8b10f44f26b28ee39ab47fd4bc905b", [0x4b836fae]),
1026            (*b"0f919a0ca6d088e44e4633428de8dd", [0x0fa70834]),
1027            (*b"ea9fd8f221b396723cc2064cfd7cb1", [0xd4038d0a]),
1028            (*b"5a94c8bcb3e606dd148e7723972538", [0x0a6a8e21]),
1029        ];
1030
1031        let emitter = CapJSEmitter::new(TOKEN);
1032        for (i, (salt, target)) in KNOWN_ANSWER.iter().enumerate() {
1033            let real_i = i + 1;
1034            let mut salt_out = [0; S];
1035            let mut target_out = [0; (D + 3) / 8];
1036            emitter.emit(&mut salt_out, &mut target_out, real_i as u32);
1037            assert_eq!(
1038                salt_out,
1039                salt.as_slice(),
1040                "salt is different at index {}",
1041                i
1042            );
1043            assert_eq!(target_out, *target);
1044        }
1045    }
1046}