Skip to main content

bnum/integer/radix/
from_radix.rs

1use crate::errors::ParseIntError;
2use crate::integer::radix::assert_range;
3use crate::{Byte, Integer, Uint};
4use core::num::IntErrorKind;
5
6#[inline]
7const fn byte_to_digit<const FROM_STR: bool>(byte: u8) -> u8 {
8    if FROM_STR {
9        match byte {
10            b'0'..=b'9' => byte - b'0',
11            b'a'..=b'z' => byte - b'a' + 10,
12            b'A'..=b'Z' => byte - b'A' + 10,
13            _ => u8::MAX,
14        }
15    } else {
16        byte
17    }
18}
19
20// iterates through the digits in the buffer, starting with the most significant digit
21// the BE parameter indicates whether the digits in the buffer are in big-endian order (i.e. whether to iterate from start or end of buffer)
22struct BufferDigitsIter<'a, const SKIP_UNDERSCORES: bool, const ASCII: bool, const BE: bool> {
23    buf: &'a [u8],
24    index: usize,
25}
26
27impl<'a, const SKIP_UNDERSCORES: bool, const ASCII: bool, const BE: bool>
28    BufferDigitsIter<'a, SKIP_UNDERSCORES, ASCII, BE>
29{
30    #[inline]
31    const fn new(buf: &'a [u8]) -> Self {
32        Self { buf, index: 0 }
33    }
34}
35
36impl<'a, const SKIP_UNDERSCORES: bool, const ASCII: bool, const BE: bool>
37    BufferDigitsIter<'a, SKIP_UNDERSCORES, ASCII, BE>
38{
39    #[inline]
40    const fn next(&mut self) -> Option<u8> {
41        while self.index < self.buf.len() {
42            let idx = if BE {
43                self.index
44            } else {
45                self.buf.len() - 1 - self.index
46            }; // we want to read most significant digits first. so if radix digits are given in big-endian, start from the index 0, otherwise start from the end
47            let b = self.buf[idx];
48            self.index += 1;
49            if SKIP_UNDERSCORES && b == b'_' {
50                continue;
51            }
52            return Some(byte_to_digit::<ASCII>(b));
53        }
54        None
55    }
56}
57
58macro_rules! impl_desc {
59    () => {
60        "Methods which convert to integers from strings and lists of digits in a given radix (base)."
61    };
62}
63
64#[doc = impl_desc!()]
65impl<const N: usize, const B: usize, const OM: u8> Uint<N, B, OM> {
66    const fn from_buf_radix_power_of_two<
67        const SKIP_UNDERSCORES: bool,
68        const ASCII: bool,
69        const BE: bool,
70        const EXACT: bool,
71    >(
72        buf: &[u8],
73        radix: u32,
74    ) -> Result<Self, ParseIntError> {
75        let radix_log2 = radix.ilog2() as usize;
76        let mut radix_digits = BufferDigitsIter::<SKIP_UNDERSCORES, ASCII, BE>::new(buf);
77
78        let mut out = Self::ZERO;
79        let mut length = buf.len();
80        let mut only_leading_zeros = true;
81        let mut overflow = false;
82        let mut bit_width = 0;
83        let mut i = 0;
84        while let Some(digit) = radix_digits.next() {
85            if digit >= radix as u8 {
86                // invalid digit
87                return Err(ParseIntError {
88                    kind: IntErrorKind::InvalidDigit,
89                });
90            }
91            if only_leading_zeros {
92                if digit == 0 {
93                    length -= 1;
94                    continue;
95                }
96                only_leading_zeros = false;
97                if SKIP_UNDERSCORES {
98                    let mut num_underscores = 0;
99                    let mut i = buf.len() - length;
100                    while i < buf.len() {
101                        if buf[i] == b'_' {
102                            num_underscores += 1;
103                        }
104                        i += 1;
105                    }
106                    length -= num_underscores;
107                }
108                if length == 0 {
109                    return Ok(Self::ZERO);
110                }
111                let target_bit_width = (length - 1) * radix_log2 + digit.ilog2() as usize + 1;
112                bit_width = digit.ilog2() as usize + 1;
113                overflow = target_bit_width > Self::BITS as usize;
114                i = length;
115            }
116
117            if !overflow {
118                i -= 1;
119                // we are setting bits from position i * radix_log2 to i * radix_log2 + radix_log2 - 1 (inclusive)
120                let byte_index = (i * radix_log2) / Byte::BITS as usize;
121
122                let bit_shift = (i * radix_log2) % Byte::BITS as usize;
123
124                if !EXACT && bit_shift != 0 && byte_index != N - 1 {
125                    // some of the bits of digit may have been shifted out
126                    // these bits are...
127                    let carry_bits = digit >> (Byte::BITS as usize - bit_shift);
128                    out.bytes[byte_index + 1] |= carry_bits;
129                    // if byte_index == N - 1, then carry_bits = 0, as we have already checked for overflow
130                }
131
132                out.bytes[byte_index] |= digit << bit_shift;
133            } else {
134                if !SKIP_UNDERSCORES && bit_width > Self::BITS as usize {
135                    
136                    return Err(ParseIntError {
137                        kind: IntErrorKind::PosOverflow,
138                    });
139                }
140                bit_width += radix_log2; // increment after the check, as we already initialised bit_width as the bit width of the first non-zero digit
141            }
142        }
143        if SKIP_UNDERSCORES && overflow { // we didn't return overflow error if parsing a literal, so return it now. guaranteed to have an overflow error if SKIP_UNDERSCORES is true
144            return Err(ParseIntError {
145                kind: IntErrorKind::PosOverflow,
146            });
147        }
148
149        Ok(out)
150    }
151
152    const fn from_buf_radix_non_power_of_two<
153        const SKIP_UNDERSCORES: bool,
154        const ASCII: bool,
155        const BE: bool,
156    >(
157        buf: &[u8],
158        radix: u32,
159    ) -> Result<Self, ParseIntError> {
160        let mut radix_digits = BufferDigitsIter::<SKIP_UNDERSCORES, ASCII, BE>::new(buf);
161
162        let mut out = Self::ZERO;
163        let mut overflow = false;
164
165        while let Some(digit) = radix_digits.next() {
166            if digit >= radix as u8 {
167                // invalid digit
168                return Err(ParseIntError {
169                    kind: IntErrorKind::InvalidDigit,
170                });
171            }
172            let (new_out, o) = out.mul_u128_digit(radix as u128);
173            overflow |= o;
174            if !SKIP_UNDERSCORES && overflow {
175                return Err(ParseIntError {
176                    kind: IntErrorKind::PosOverflow,
177                });
178            }
179            out = new_out;
180            match out.checked_add(Self::from_byte(digit)) { // checked_add is necessary here
181                Some(n) => out = n,
182                None => {
183                    if SKIP_UNDERSCORES {
184                        overflow = true; // delay returning overflow error when parsing literals as want to check if every digit is valid firstf
185                    } else {
186                        return Err(ParseIntError {
187                            kind: IntErrorKind::PosOverflow,
188                        });
189                    }
190                }
191            };
192        }
193        if SKIP_UNDERSCORES && overflow { // if we get to this stage (if SKIP_UNDERSCORES is true), then there is overflow and there were no invalid digits, so return overflow error
194            return Err(ParseIntError {
195                kind: IntErrorKind::PosOverflow,
196            });
197        }
198        Ok(out)
199    }
200
201    pub(crate) const fn from_buf_radix<const SKIP_UNDERSCORES: bool, const ASCII: bool, const BE: bool>(
202        buf: &[u8],
203        radix: u32,
204    ) -> Result<Self, ParseIntError> {
205        match radix {
206            2 | 4 | 16 | 256 => {
207                Self::from_buf_radix_power_of_two::<SKIP_UNDERSCORES, ASCII, BE, true>(buf, radix)
208            }
209            8 | 32 | 64 | 128 => {
210                Self::from_buf_radix_power_of_two::<SKIP_UNDERSCORES, ASCII, BE, false>(buf, radix)
211            }
212            _ => Self::from_buf_radix_non_power_of_two::<SKIP_UNDERSCORES, ASCII, BE>(buf, radix),
213        }
214    }
215
216    /// Converts a slice of big-endian digits in the given radix to an integer. Each `u8` of the slice is interpreted as one digit of base `radix` of the number, so this function will return `None` if any digit is greater than or equal to `radix`, or if the integer represented by the digits is too large to be represented by `Self`. Otherwise, the integer is wrapped in `Some`.
217    ///
218    /// # Panics
219    ///
220    /// This function panics if `radix` is not in the range from 2 to 256 inclusive.
221    ///
222    /// # Examples
223    ///
224    /// ```
225    /// use bnum::prelude::*;
226    /// use bnum::types::U512;
227    ///
228    /// let a = U512::from_radix_be(&[4, 3, 2, 1], 11).unwrap();
229    /// let b: U512 = n!(1)*n!(11).pow(0) + n!(2)*n!(11).pow(1) + n!(3)*n!(11).pow(2) + n!(4)*n!(11).pow(3);
230    ///
231    /// assert_eq!(a, b); // 4*11^3 + 3*11^2 + 2*11^1 + 1*11^0
232    /// ```
233    #[inline]
234    pub const fn from_radix_be(buf: &[u8], radix: u32) -> Option<Self> {
235        assert_range!(radix, 256);
236        if buf.is_empty() {
237            return Some(Self::ZERO);
238        }
239        if radix == 256 {
240            return Self::from_be_slice(buf);
241        }
242
243        crate::helpers::ok!(Self::from_buf_radix::<false, false, true>(
244            buf, radix
245        ))
246    }
247
248    /// Converts a slice of little-endian digits in the given radix to an integer. Each `u8` of the slice is interpreted as one digit of base `radix` of the number, so this function will return `None` if any digit is greater than or equal to `radix`, or if the integer represented by the digits is too large to be represented by `Self`. Otherwise, the integer is wrapped in `Some`.
249    ///
250    /// # Panics
251    ///
252    /// This function panics if `radix` is not in the range from 2 to 256 inclusive.
253    ///
254    /// # Examples
255    ///
256    /// ```
257    /// use bnum::prelude::*;
258    /// use bnum::types::U512;
259    ///
260    /// let a = U512::from_radix_le(&[5, 6, 7, 8], 18).unwrap();
261    /// let b: U512 = n!(5)*n!(18).pow(0) + n!(6)*n!(18).pow(1) + n!(7)*n!(18).pow(2) + n!(8)*n!(18).pow(3);
262    ///
263    /// assert_eq!(a, b); // 8*18^3 + 7*18^2 + 6*18^1 + 5*18^0
264    /// ```
265    #[inline]
266    pub const fn from_radix_le(buf: &[u8], radix: u32) -> Option<Self> {
267        assert_range!(radix, 256);
268        if buf.is_empty() {
269            return Some(Self::ZERO);
270        }
271        if radix == 256 {
272            return Self::from_le_slice(buf);
273        }
274
275        crate::helpers::ok!(Self::from_buf_radix::<false, false, false>(
276            buf, radix
277        ))
278    }
279}
280
281impl<const S: bool, const N: usize, const B: usize, const OM: u8> Integer<S, N, B, OM> {
282    /// Converts a string slice in a given base to an integer.
283    ///
284    /// The string is expected to be an optional `+` (or `-` if the integer is signed) sign followed by digits. Leading and trailing whitespace represent an error. Underscores (which are accepted in Rust literals) also represent an error.
285    ///
286    /// Digits are a subset of these characters, depending on `radix`:
287    ///
288    /// - `0-9`
289    /// - `a-z`
290    /// - `A-Z`
291    ///
292    /// # Panics
293    ///
294    /// This function panics if `radix` is not in the range from 2 to 36 inclusive.
295    ///
296    /// # Examples
297    ///
298    /// Basic usage:
299    ///
300    /// ```
301    /// use bnum::prelude::*;
302    /// use bnum::types::{U512, I512};
303    ///
304    /// assert_eq!(U512::from_str_radix("A", 16), Ok(n!(10)));
305    /// assert_eq!(I512::from_str_radix("-B", 16), Ok(n!(-11)));
306    /// ```
307    #[inline]
308    pub const fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError> {
309        Self::from_ascii_radix(src.as_bytes(), radix)
310    }
311
312    /// Parses an integer from an ASCII-byte slice with decimal digits.
313    ///
314    /// The characters are expected to be an optional `+` (or `-` if the integer is signed) sign followed by only digits. Leading and trailing non-digit characters (including whitespace) represent an error. Underscores (which are accepted in Rust literals) also represent an error.
315    ///
316    /// # Examples
317    ///
318    /// Basic usage:
319    ///
320    /// ```
321    /// use bnum::prelude::*;
322    /// use bnum::types::{U512, I512};
323    ///
324    /// assert_eq!(U512::from_ascii(b"+10"), Ok(n!(10U512)));
325    /// assert_eq!(I512::from_ascii(b"-1234"), Ok(n!(-1234I512)));
326    /// ```
327    #[inline]
328    pub const fn from_ascii(src: &[u8]) -> Result<Self, ParseIntError> {
329        Self::from_ascii_radix(src, 10)
330    }
331
332    /// Parses an integer from an ASCII-byte slice with digits in a given base.
333    ///
334    /// The characters are expected to be an optional `+` sign followed by only digits. Leading and trailing non-digit characters (including whitespace) represent an error. Underscores (which are accepted in Rust literals) also represent an error.
335    ///
336    /// Digits are a subset of these characters, depending on radix:
337    ///
338    /// - `0-9`
339    /// - `a-z`
340    /// - `A-Z`
341    ///
342    /// # Panics
343    ///
344    /// This function panics if radix is not in the range from 2 to 36 inclusive.
345    ///
346    /// # Examples
347    ///
348    /// Basic usage:
349    ///
350    /// ```
351    /// use bnum::prelude::*;
352    /// use bnum::types::{U512, I512};
353    ///
354    /// assert_eq!(U512::from_ascii_radix(b"A", 16), Ok(n!(10)));
355    /// assert_eq!(I512::from_ascii_radix(b"-C", 16), Ok(n!(-12)));
356    /// ```
357    #[inline]
358    pub const fn from_ascii_radix(src: &[u8], radix: u32) -> Result<Self, ParseIntError> {
359        assert_range!(radix, 36);
360        if src.is_empty() {
361            return Err(ParseIntError {
362                kind: IntErrorKind::Empty,
363            });
364        }
365        if src.len() == 1 && (src[0] == b'+' || (S && src[0] == b'-')) {
366            return Err(ParseIntError {
367                kind: IntErrorKind::InvalidDigit,
368            });
369        }
370
371        let (src, negative) = if S && src[0] == b'-' {
372            (src.split_at(1).1, true)
373        } else if src[0] == b'+' {
374            (src.split_at(1).1, false)
375        } else {
376            (src, false)
377        };
378        match Uint::from_buf_radix::<false, true, true>(src, radix) {
379            Ok(uint) => {
380                let out = uint.force_sign::<S>();
381                if S && negative {
382                    // no error iff out is positive or out is Self::MIN, i.e. ...
383                    if uint.gt(&Self::MIN.force_sign()) {
384                        Err(ParseIntError {
385                            kind: IntErrorKind::NegOverflow,
386                        })
387                    } else {
388                        Ok(out.wrapping_neg()) // needs to be wrapping_neg as we need to handle the Self::MIN case (Self::MIN is mapped to Self:MIN)
389                    }
390                } else {
391                    if out.is_negative_internal() {
392                        Err(ParseIntError {
393                            kind: IntErrorKind::PosOverflow,
394                        })
395                    } else {
396                        Ok(out)
397                    }
398                }
399            }
400            Err(err) => match err.kind() {
401                IntErrorKind::PosOverflow if S && negative => Err(ParseIntError {
402                    kind: IntErrorKind::NegOverflow,
403                }),
404                _ => Err(err),
405            },
406        }
407    }
408}
409
410#[cfg(test)]
411mod tests {
412    use crate::test::test_bignum;
413    use core::num::IntErrorKind;
414    use core::str::FromStr;
415
416    crate::test::test_all! {
417        testing integers;
418
419        test_bignum! {
420            function: <stest>::from_str,
421            cases: [
422                ("398475394875230495745"),
423                ("3984753948752304957423490785029749572977970985"),
424                ("12345💩👍"),
425                ("1234567890a"),
426                (""),
427                ("+10"),
428                ("-10"),
429                ("1234567890"),
430                ("-1234567890"),
431                ("+1234567890"),
432                ("-12345678901234567890"),
433                ("+12345678901234567890"),
434                ("-9223372036854775808"),
435                ("+9223372036854775807")
436            ]
437        }
438
439        #[cfg(nightly)] // since int_from_ascii not stable yet
440        test_bignum! {
441            function: <stest>::from_ascii,
442            cases: [
443                ("11111111".as_bytes()),
444                ("10000000000000000000000000000000000".as_bytes()),
445                ("12💩👍45".as_bytes()),
446                ("b1234567890a".as_bytes()),
447                ("".as_bytes()),
448                ("+10".as_bytes()),
449                ("-10".as_bytes()),
450                ("1234567890".as_bytes()),
451                ("-1234567890".as_bytes()),
452                ("+1234567890".as_bytes()),
453                ("-12345678901234567890".as_bytes()),
454                ("+12345678901234567890".as_bytes()),
455                ("-9223372036854775808".as_bytes()),
456                ("+9223372036854775807".as_bytes()),
457                ("0".as_bytes())
458            ]
459        }
460
461        #[cfg(nightly)] // since int_from_ascii not stable yet
462        test_bignum! {
463            function: <stest>::from_ascii_radix,
464            cases: [
465                ("+af7345asdofiuweor".as_bytes(), 35u32),
466                ("+945hhdgi73945hjdfj".as_bytes(), 32u32),
467                ("+3436847561345343455".as_bytes(), 9u32),
468                ("+affe758457bc345540ac399".as_bytes(), 16u32),
469                ("+affe758457bc345540ac39929334534ee34579234795".as_bytes(), 17u32),
470                ("+3777777777777777777777777777777777777777777".as_bytes(), 8u32),
471                ("+37777777777777777777777777777777777777777761".as_bytes(), 8u32),
472                ("+1777777777777777777777".as_bytes(), 8u32),
473                ("+17777777777777777777773".as_bytes(), 8u32),
474                ("+2000000000000000000000".as_bytes(), 8u32),
475                ("-234598734".as_bytes(), 10u32),
476                ("g234ab".as_bytes(), 16u32),
477                ("234£$2234".as_bytes(), 15u32),
478                ("123456💯".as_bytes(), 30u32),
479                ("3434💯34593487".as_bytes(), 12u32),
480                ("💯34593487".as_bytes(), 11u32),
481                ("abcdefw".as_bytes(), 32u32),
482                ("1234ab".as_bytes(), 11u32),
483                ("1234".as_bytes(), 4u32),
484                ("010120101".as_bytes(), 2u32),
485                ("10000000000000000".as_bytes(), 16u32),
486                ("p8hrbe0mo0084i6vckj1tk7uvacnn4cm".as_bytes(), 32u32),
487                ("".as_bytes(), 10u32),
488                ("-14359abcasdhfkdgdfgsde".as_bytes(), 34u32),
489                ("+23797984569ahgkhhjdskjdfiu".as_bytes(), 32u32),
490                ("-253613132341435345".as_bytes(), 7u32),
491                ("+23467abcad47790809ef37".as_bytes(), 16u32),
492                ("-712930769245766867875986646".as_bytes(), 10u32),
493                ("-😱234292".as_bytes(), 36u32),
494                ("-+345934758".as_bytes(), 13u32),
495                ("12💯12".as_bytes(), 15u32),
496                ("gap gap".as_bytes(), 36u32),
497                ("-9223372036854775809".as_bytes(), 10u32),
498                ("-1000000000000000000001".as_bytes(), 8u32),
499                ("+1000000000000000000001".as_bytes(), 8u32),
500                ("-8000000000000001".as_bytes(), 16u32),
501                ("+-23459374".as_bytes(), 15u32),
502                ("8000000000000000".as_bytes(), 16u32),
503                ("".as_bytes(), 10u32)
504            ]
505        }
506
507        test_bignum! {
508            function: <stest>::from_str_radix,
509            cases: [
510                ("+af7345asdofiuweor", 35u32),
511                ("+945hhdgi73945hjdfj", 32u32),
512                ("+3436847561345343455", 9u32),
513                ("+affe758457bc345540ac399", 16u32),
514                ("+affe758457bc345540ac39929334534ee34579234795", 17u32),
515                ("+3777777777777777777777777777777777777777777", 8u32),
516                ("+37777777777777777777777777777777777777777761", 8u32),
517                ("+1777777777777777777777", 8u32),
518                ("+17777777777777777777773", 8u32),
519                ("+2000000000000000000000", 8u32),
520                ("-234598734", 10u32),
521                ("234ab", 16u32),
522                ("g234ab", 16u32),
523                ("234£$2234", 15u32),
524                ("123456💯", 30u32),
525                ("3434💯34593487", 12u32),
526                ("💯34593487", 11u32),
527                ("abcdefw", 32u32),
528                ("1234ab", 11u32),
529                ("1234", 4u32),
530                ("010120101", 2u32),
531                ("10000000000000000", 16u32),
532                ("p8hrbe0mo0084i6vckj1tk7uvacnn4cm", 32u32),
533                ("", 10u32),
534                ("-14359abcasdhfkdgdfgsde", 34u32),
535                ("+23797984569ahgkhhjdskjdfiu", 32u32),
536                ("-253613132341435345", 7u32),
537                ("+23467abcad47790809ef37", 16u32),
538                ("-712930769245766867875986646", 10u32),
539                ("-😱234292", 36u32),
540                ("-+345934758", 13u32),
541                ("12💯12", 15u32),
542                ("gap gap", 36u32),
543                ("-9223372036854775809", 10u32),
544                ("-1000000000000000000001", 8u32),
545                ("+1000000000000000000001", 8u32),
546                ("-8000000000000001", 16u32),
547                ("+-23459374", 15u32),
548                ("8000000000000000", 16u32),
549                ("", 10u32)
550            ]
551        }
552
553        #[cfg(feature = "alloc")]
554        crate::test::quickcheck_from_str!(stest);
555
556        #[test]
557        fn from_str_radix_empty() {
558            let _ = STEST::from_str_radix("", 10).unwrap_err().kind() == &IntErrorKind::Empty;
559        }
560
561        #[test]
562        fn from_str_radix_invalid_char() {
563            let _ = STEST::from_str_radix("a", 10).unwrap_err().kind() == &IntErrorKind::InvalidDigit;
564        }
565
566        #[test]
567        #[should_panic(expected = "Radix must be in range [2, 36]")]
568        fn from_str_radix_invalid_radix() {
569            let _ = STEST::from_str_radix("1234", 37).unwrap();
570        }
571    }
572
573    crate::test::test_all! {
574        testing unsigned;
575
576        #[cfg(feature = "alloc")]
577        crate::test::quickcheck_from_str_radix!(utest, "+" | "");
578
579        #[test]
580        #[should_panic(expected = "Radix must be in range [2, 256]")]
581        fn from_radix_be_invalid_radix() {
582            let _ = UTEST::from_radix_be(&[1], 257);
583        }
584
585        #[test]
586        #[should_panic(expected = "Radix must be in range [2, 256]")]
587        fn from_radix_le_invalid_radix() {
588            let _ = UTEST::from_radix_le(&[1], 257);
589        }
590
591        #[test]
592        fn parse_empty() {
593            assert_eq!(UTEST::from_radix_be(&[], 10), Some(UTEST::ZERO));
594            assert_eq!(UTEST::from_radix_le(&[], 10), Some(UTEST::ZERO));
595        }
596    }
597
598    crate::test::test_all! {
599        testing signed;
600
601        #[cfg(feature = "alloc")]
602        crate::test::quickcheck_from_str_radix!(itest, "+" | "-");
603    }
604
605    // TODO: custom bit width tests for from_str_radix
606}