fpdec_core/
parser.rs

1// ---------------------------------------------------------------------------
2// Copyright:   (c) 2021 ff. Michael Amrhein (michael@adrhinum.de)
3// License:     This program is part of a larger application. For license
4//              details please read the file LICENSE.TXT provided together
5//              with the application.
6// ---------------------------------------------------------------------------
7// $Source: fpdec-core/src/parser.rs $
8// $Revision: 2025-11-28T21:00:03+01:00 $
9
10use core::{
11    fmt::{Display, Formatter},
12    ptr,
13};
14
15/// An error which can be returned when parsing a decimal literal.
16///
17/// This error is used as the error type for the `FromStr` implementation of
18/// `Decimal`.
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub enum ParseDecimalError {
21    /// An empty string has been given as literal.
22    Empty,
23    /// The given string is not a valid decimal literal.
24    Invalid,
25    /// The given decimal literal has more fractional digits than specified
26    /// by `MAX_N_FRAC_DIGITS`.
27    FracDigitLimitExceeded,
28    /// The given decimal literal would exceed the internal representation of
29    /// `Decimal`.
30    InternalOverflow,
31}
32
33impl ParseDecimalError {
34    #[doc(hidden)]
35    #[must_use]
36    pub const fn _description(&self) -> &str {
37        match self {
38            Self::Empty => "Empty string.",
39            Self::Invalid => "Invalid decimal string literal.",
40            Self::FracDigitLimitExceeded => "Too many fractional digits.",
41            Self::InternalOverflow => "Internal representation exceeded.",
42        }
43    }
44}
45
46impl Display for ParseDecimalError {
47    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
48        Display::fmt(self._description(), f)
49    }
50}
51
52#[cfg(feature = "std")]
53impl std::error::Error for ParseDecimalError {}
54
55/// Check whether an u64 is holding 8 decimal digits.
56const fn chunk_contains_8_digits(chunk: u64) -> bool {
57    // Subtract b'0' from each byte.
58    let x = chunk.wrapping_sub(0x3030303030303030);
59    // Add 0x46 (= 0x7f - b'9') to each byte.
60    let y = chunk.wrapping_add(0x4646464646464646);
61    // In x now all original bytes < b'0' have the highest bit set, and
62    // in y now all original bytes > b'9' are > 0x7f.
63    // Then, in x|y all original bytes besides b'0' .. b'9' are > 0x7f.
64    // Thus, bitwise-and with 0x80 gives 0 for all original bytes b'0' .. b'9'
65    // and 0x7f for all others.
66    (x | y) & 0x8080808080808080 == 0
67}
68
69/// Convert an u64 holding a sequence of 8 decimal digits into an u64.
70const fn chunk_to_u64(mut chunk: u64) -> u64 {
71    // The following is adopted from Johnny Lee: Fast numeric string to int
72    // [https://johnnylee-sde.github.io/Fast-numeric-string-to-int].
73    chunk &= 0x0f0f0f0f0f0f0f0f;
74    chunk = (chunk & 0x000f000f000f000f)
75        .wrapping_mul(10)
76        .wrapping_add((chunk >> 8) & 0x000f000f000f000f);
77    chunk = (chunk & 0x0000007f0000007f)
78        .wrapping_mul(100)
79        .wrapping_add((chunk >> 16) & 0x0000007f0000007f);
80    (chunk & 0x3fff)
81        .wrapping_mul(10000)
82        .wrapping_add((chunk >> 32) & 0x3fff)
83}
84
85// Bytes wrapper specialized for parsing decimal number literals
86struct AsciiDecLit<'a> {
87    bytes: &'a [u8],
88}
89
90#[allow(unsafe_code)]
91impl<'a> AsciiDecLit<'a> {
92    const fn new(bytes: &'a [u8]) -> Self {
93        Self { bytes }
94    }
95
96    const fn is_empty(&self) -> bool {
97        self.bytes.is_empty()
98    }
99
100    const fn len(&self) -> usize {
101        self.bytes.len()
102    }
103
104    /// self <- self[n..]
105    unsafe fn skip_n(&mut self, n: usize) -> &mut Self {
106        debug_assert!(self.bytes.len() >= n);
107        self.bytes = self.bytes.get_unchecked(n..);
108        self
109    }
110
111    /// self <- self[n..]
112    unsafe fn skip_1(&mut self) -> &mut Self {
113        self.skip_n(1)
114    }
115
116    const fn first(&self) -> Option<&u8> {
117        self.bytes.first()
118    }
119
120    fn first_eq(&self, b: u8) -> bool {
121        Some(&b) == self.first()
122    }
123
124    #[allow(dead_code)]
125    const fn first_is_digit(&self) -> bool {
126        matches!(self.first(), Some(c) if c.wrapping_sub(b'0') < 10)
127    }
128
129    fn skip_leading_zeroes(&mut self) -> &mut Self {
130        while self.first_eq(b'0') {
131            // Safety: safe because of condition above!
132            unsafe {
133                self.skip_1();
134            };
135        }
136        self
137    }
138
139    // Read 8 bytes as u64 (little-endian).
140    unsafe fn read_u64_unchecked(&self) -> u64 {
141        debug_assert!(self.bytes.len() >= 8);
142        let src = self.bytes.as_ptr() as *const u64;
143        u64::from_le(ptr::read_unaligned(src))
144    }
145
146    // Try to read the next 8 bytes from self.
147    fn read_u64(&self) -> Option<u64> {
148        (self.len() >= 8).then(||
149            // Safety: safe because of condition above!
150            unsafe { self.read_u64_unchecked() })
151    }
152
153    /// Convert the leading sequence of decimal digits in `self` (if any) into
154    /// an int and accumulate it into `coeff`.
155    // The function uses wrapping_mul and wrapping_add, so overflow can
156    // happen; it must be checked later!
157    fn accum_coeff(&mut self, coeff: &mut u128) -> usize {
158        let start_len = self.len();
159        // First, try chunks of 8 digits
160        while let Some(k) = self.read_u64() {
161            if chunk_contains_8_digits(k) {
162                *coeff = coeff
163                    .wrapping_mul(100000000)
164                    .wrapping_add(chunk_to_u64(k) as u128);
165                // Safety: safe because of call to self.read_u64 above
166                unsafe {
167                    self.skip_n(8);
168                }
169            } else {
170                break;
171            }
172        }
173        // Handle remaining digits
174        while let Some(c) = self.first() {
175            let d = c.wrapping_sub(b'0');
176            if d < 10 {
177                *coeff = coeff.wrapping_mul(10).wrapping_add(d as u128);
178                // Safety: safe because of call to self.first above
179                unsafe {
180                    self.skip_1();
181                }
182            } else {
183                break;
184            }
185        }
186        start_len - self.len()
187    }
188
189    /// Convert the leading sequence of decimal digits in `self` (if any) into
190    /// an int and accumulate it into `exp`.
191    // The function uses wrapping_mul and wrapping_add, but overflow is
192    // prevented by limiting the result to a value which will cause an error
193    // later!
194    fn accum_exp(&mut self, exp: &mut isize) -> usize {
195        let start_len = self.len();
196        while let Some(c) = self.first() {
197            let d = c.wrapping_sub(b'0');
198            if d < 10 {
199                if *exp < 0x1000000 {
200                    *exp = exp.wrapping_mul(10).wrapping_add(d as isize);
201                }
202                // Safety: safe because of call to self.first above
203                unsafe {
204                    self.skip_1();
205                }
206            } else {
207                break;
208            }
209        }
210        start_len - self.len()
211    }
212}
213
214/// Convert a decimal number literal into a representation in the form
215/// (coefficient, exponent), so that number == coefficient * 10 ^ exponent.
216///
217/// The literal must be in the form
218///
219/// `[+|-]<int>[.<frac>][<e|E>[+|-]<exp>]`
220///
221/// or
222///
223/// `[+|-].<frac>[<e|E>[+|-]<exp>]`.
224#[doc(hidden)]
225#[allow(clippy::cast_possible_wrap)]
226#[allow(unsafe_code)]
227pub fn str_to_dec(lit: &str) -> Result<(i128, isize), ParseDecimalError> {
228    let mut lit = AsciiDecLit::new(lit.as_ref());
229    let is_negative = match lit.first() {
230        None => {
231            return Err(ParseDecimalError::Empty);
232        }
233        Some(b'-') => {
234            // Safety: safe because of match
235            unsafe { lit.skip_1() };
236            true
237        }
238        Some(b'+') => {
239            // Safety: safe because of match
240            unsafe { lit.skip_1() };
241            false
242        }
243        _ => false,
244    };
245    if lit.is_empty() {
246        return Err(ParseDecimalError::Invalid);
247    }
248    lit.skip_leading_zeroes();
249    if lit.is_empty() {
250        // There must have been atleast one zero. Ignore sign.
251        return Ok((0, 0));
252    }
253    let mut coeff = 0_u128;
254    // Parse integral digits.
255    let n_int_digits = lit.accum_coeff(&mut coeff);
256    // Check for radix point and parse fractional digits.
257    let mut n_frac_digits = 0_usize;
258    if let Some(c) = lit.first() {
259        if *c == b'.' {
260            // Safety: safe because of condition above
261            unsafe { lit.skip_1() };
262            n_frac_digits = lit.accum_coeff(&mut coeff);
263        }
264    }
265    let n_digits = n_int_digits + n_frac_digits;
266    if n_digits == 0 {
267        return Err(ParseDecimalError::Invalid);
268    }
269    // check for overflow
270    // 1. 10^e > i128::MAX for e > 39
271    // 2. e = 39 && coeff < 10³⁸ (overflow occured during accumulation)
272    // 3. coeff > i128::MAX
273    if n_digits > 39
274        || n_digits == 39
275            && coeff < 100000000000000000000000000000000000000_u128
276        || coeff > i128::MAX as u128
277    {
278        return Err(ParseDecimalError::InternalOverflow);
279    }
280    let mut exp = 0_isize;
281    // check for explicit exponent
282    if let Some(c) = lit.first() {
283        if *c == b'e' || *c == b'E' {
284            // Safety: safe because of condition above
285            unsafe { lit.skip_1() };
286            let exp_is_negative = match lit.first() {
287                None => {
288                    return Err(ParseDecimalError::Invalid);
289                }
290                Some(b'-') => {
291                    // Safety: safe because of match
292                    unsafe { lit.skip_1() };
293                    true
294                }
295                Some(b'+') => {
296                    // Safety: safe because of match
297                    unsafe { lit.skip_1() };
298                    false
299                }
300                _ => false,
301            };
302            let n_exp_digits = lit.accum_exp(&mut exp);
303            if exp_is_negative {
304                exp = -exp;
305            }
306            if n_exp_digits > 2 {
307                return Err(ParseDecimalError::FracDigitLimitExceeded);
308            }
309        } else {
310            return Err(ParseDecimalError::Invalid);
311        }
312    }
313    if !lit.is_empty() {
314        return Err(ParseDecimalError::Invalid);
315    }
316    exp -= n_frac_digits as isize;
317    if -exp > crate::MAX_N_FRAC_DIGITS as isize {
318        return Err(ParseDecimalError::FracDigitLimitExceeded);
319    }
320    if is_negative {
321        Ok((-(coeff as i128), exp))
322    } else {
323        Ok((coeff as i128, exp))
324    }
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330    // use crate::str2dec;
331
332    #[test]
333    fn test_parse_int_lit() {
334        let res = str_to_dec("1957945").unwrap();
335        assert_eq!(res, (1957945, 0));
336    }
337
338    #[test]
339    fn test_parse_dec_lit() {
340        let res = str_to_dec("-17.5").unwrap();
341        assert_eq!(res, (-175, -1));
342    }
343
344    #[test]
345    fn test_parse_frac_only_lit() {
346        let res = str_to_dec("+.75").unwrap();
347        assert_eq!(res, (75, -2));
348    }
349
350    #[test]
351    fn test_parse_int_lit_neg_exp() {
352        let res = str_to_dec("17e-5").unwrap();
353        assert_eq!(res, (17, -5));
354    }
355
356    #[test]
357    fn test_parse_int_lit_pos_exp() {
358        let res = str_to_dec("+217e3").unwrap();
359        assert_eq!(res, (217, 3));
360    }
361
362    #[test]
363    fn test_parse_dec_lit_neg_exp() {
364        let res = str_to_dec("-533.7e-2").unwrap();
365        assert_eq!(res, (-5337, -3));
366    }
367
368    #[test]
369    fn test_parse_dec_lit_pos_exp() {
370        let res = str_to_dec("700004.002E13").unwrap();
371        assert_eq!(res, (700004002, 10));
372    }
373
374    #[test]
375    fn test_err_empty_str() {
376        let res = str_to_dec("");
377        assert!(res.is_err());
378        let err = res.unwrap_err();
379        assert_eq!(err, ParseDecimalError::Empty);
380    }
381
382    #[test]
383    fn test_err_invalid_lit() {
384        let lits = [" ", "+", "-4.33.2", "2.87 e3", "+e3", ".4e3 "];
385        for lit in lits {
386            let res = str_to_dec(lit);
387            assert!(res.is_err());
388            let err = res.unwrap_err();
389            assert_eq!(err, ParseDecimalError::Invalid);
390        }
391    }
392
393    #[test]
394    fn test_frac_limit_exceeded() {
395        let res = str_to_dec("0.17295887390016377542");
396        assert!(res.is_err());
397        let err = res.unwrap_err();
398        assert_eq!(err, ParseDecimalError::FracDigitLimitExceeded);
399    }
400
401    #[test]
402    fn test_frac_limit_exceeded_with_exp() {
403        let res = str_to_dec("17.493e-36");
404        assert!(res.is_err());
405        let err = res.unwrap_err();
406        assert_eq!(err, ParseDecimalError::FracDigitLimitExceeded);
407    }
408
409    #[test]
410    fn test_int_lit_max_val_exceeded() {
411        let s = "170141183460469231731687303715884105728";
412        let res = str_to_dec(s);
413        assert!(res.is_err());
414        let err = res.unwrap_err();
415        assert_eq!(err, ParseDecimalError::InternalOverflow);
416    }
417
418    #[test]
419    fn test_dec_lit_max_val_exceeded() {
420        let s = "1701411834604692317316873037158841058.00";
421        let res = str_to_dec(s);
422        assert!(res.is_err());
423        let err = res.unwrap_err();
424        assert_eq!(err, ParseDecimalError::InternalOverflow);
425    }
426}