dashu_int/parse/
non_power_two.rs

1//! Parse in a non-power-of-two radix.
2
3use crate::{
4    arch::word::Word,
5    buffer::Buffer,
6    mul,
7    radix::{self, Digit},
8    repr::Repr,
9    ubig::UBig,
10};
11use alloc::vec;
12use dashu_base::ParseError;
13
14/// Parse in chunks of CHUNK_LEN * digits_per_word.
15const CHUNK_LEN: usize = 256;
16
17/// Parse an unsigned string to [UBig].
18pub fn parse(src: &str, radix: Digit) -> Result<UBig, ParseError> {
19    debug_assert!(radix::is_radix_valid(radix) && !radix.is_power_of_two());
20    let radix_info = radix::radix_info(radix);
21    let mut bytes = src.as_bytes();
22    let stripped: vec::Vec<u8>;
23
24    // strip all underscores if detected
25    if bytes.contains(&b'_') {
26        stripped = bytes.iter().copied().filter(|&c| c != b'_').collect();
27        bytes = &stripped;
28    }
29
30    if bytes.len() <= radix_info.digits_per_word {
31        Ok(parse_word(bytes, radix)?.into())
32    } else if bytes.len() <= CHUNK_LEN * radix_info.digits_per_word {
33        parse_chunk(bytes, radix)
34    } else {
35        parse_large(bytes, radix)
36    }
37}
38
39/// Parse an unsigned string to `Word`.
40///
41/// The length of the string must be at most `digits_per_word`.
42fn parse_word(src: &[u8], radix: Digit) -> Result<Word, ParseError> {
43    debug_assert!(radix::is_radix_valid(radix) && !radix.is_power_of_two());
44    debug_assert!(src.len() <= radix::radix_info(radix).digits_per_word);
45
46    let mut word: Word = 0;
47    for byte in src.iter() {
48        let digit = radix::digit_from_ascii_byte(*byte, radix).ok_or(ParseError::InvalidDigit)?;
49        word = word * (radix as Word) + (digit as Word);
50    }
51    Ok(word)
52}
53
54/// Parse an unsigned string to [UBig].
55///
56/// The length of input is limited to `CHUNK_LEN * digits_per_word`.
57fn parse_chunk(bytes: &[u8], radix: Digit) -> Result<UBig, ParseError> {
58    debug_assert!(radix::is_radix_valid(radix) && !radix.is_power_of_two());
59    let radix_info = radix::radix_info(radix);
60    debug_assert!(bytes.len() <= CHUNK_LEN * radix_info.digits_per_word);
61
62    let groups = bytes.rchunks(radix_info.digits_per_word);
63    let mut buffer = Buffer::allocate(groups.len());
64    for group in groups.rev() {
65        let next = parse_word(group, radix)?;
66        let carry = mul::mul_word_in_place_with_carry(&mut buffer, radix_info.range_per_word, next);
67        if carry != 0 {
68            buffer.push(carry);
69        }
70    }
71    Ok(UBig(Repr::from_buffer(buffer)))
72}
73
74/// Parse an unsigned string to [UBig].
75///
76/// This result will usually not fit in CHUNK_LEN words.
77fn parse_large(bytes: &[u8], radix: Digit) -> Result<UBig, ParseError> {
78    debug_assert!(radix::is_radix_valid(radix) && !radix.is_power_of_two());
79    let radix_info = radix::radix_info(radix);
80    let chunk_bytes = CHUNK_LEN * radix_info.digits_per_word;
81    debug_assert!(bytes.len() > chunk_bytes);
82
83    // Calculate radix^(CHUNK_LEN<<i).
84    let mut radix_powers = vec![UBig::from(radix_info.range_per_word).pow(CHUNK_LEN)];
85
86    // while (chunk_bytes << radix_powers.len()) < bytes.len()
87    // To avoid overflow:
88    while chunk_bytes <= (bytes.len() - 1) >> radix_powers.len() {
89        let prev = radix_powers.last().unwrap();
90        let new = prev * prev;
91        radix_powers.push(new);
92    }
93
94    parse_large_divide_conquer(bytes, radix, chunk_bytes, &radix_powers)
95}
96
97/// Convert an unsigned string to [UBig].
98///
99/// `radix_powers` contains radix^n for n = chunk digits << i
100fn parse_large_divide_conquer(
101    bytes: &[u8],
102    radix: Digit,
103    chunk_bytes: usize,
104    radix_powers: &[UBig],
105) -> Result<UBig, ParseError> {
106    debug_assert!(bytes.len() <= chunk_bytes << radix_powers.len());
107
108    match radix_powers.split_last() {
109        None => parse_chunk(bytes, radix),
110        Some((radix_power, radix_powers)) => {
111            let bytes_lo_len = chunk_bytes << radix_powers.len();
112            if bytes.len() <= bytes_lo_len {
113                parse_large_divide_conquer(bytes, radix, chunk_bytes, radix_powers)
114            } else {
115                let (bytes_hi, bytes_lo) = bytes.split_at(bytes.len() - bytes_lo_len);
116                let res_hi =
117                    parse_large_divide_conquer(bytes_hi, radix, chunk_bytes, radix_powers)?;
118                let res_lo =
119                    parse_large_divide_conquer(bytes_lo, radix, chunk_bytes, radix_powers)?;
120                Ok(res_hi * radix_power + res_lo)
121            }
122        }
123    }
124}