fpdec_core/
parser.rs

1// ---------------------------------------------------------------------------
2// Copyright:   (c) 2021 ff. Michael Amrhein (michael@adrhinum.de)
3// License:     This program is part of a larger application. For license
4//              details please read the file LICENSE.TXT provided together
5//              with the application.
6// ---------------------------------------------------------------------------
7// $Source: fpdec-core/src/parser.rs $
8// $Revision: 2023-05-12T15:46:50+02:00 $
9
10use core::{
11    fmt::{Display, Formatter},
12    ptr,
13};
14
15/// An error which can be returned when parsing a decimal literal.
16///
17/// This error is used as the error type for the `FromStr` implementation of
18/// `Decimal`.
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub enum ParseDecimalError {
21    /// An empty string has been given as literal.
22    Empty,
23    /// The given string is not a valid decimal literal.
24    Invalid,
25    /// The given decimal literal has more fractional digits than specified
26    /// by `MAX_N_FRAC_DIGITS`.
27    FracDigitLimitExceeded,
28    /// The given decimal literal would exceed the internal representation of
29    /// `Decimal`.
30    InternalOverflow,
31}
32
33impl ParseDecimalError {
34    #[doc(hidden)]
35    #[must_use]
36    pub const fn _description(&self) -> &str {
37        match self {
38            Self::Empty => "Empty string.",
39            Self::Invalid => "Invalid decimal string literal.",
40            Self::FracDigitLimitExceeded => "Too many fractional digits.",
41            Self::InternalOverflow => "Internal representation exceeded.",
42        }
43    }
44}
45
46impl Display for ParseDecimalError {
47    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
48        Display::fmt(self._description(), f)
49    }
50}
51
52#[cfg(feature = "std")]
53impl std::error::Error for ParseDecimalError {}
54
55/// Check whether an u64 is holding 8 decimal digits.
56const fn chunk_contains_8_digits(chunk: u64) -> bool {
57    // Subtract b'0' from each byte.
58    let x = chunk.wrapping_sub(0x3030303030303030);
59    // Add 0x46 (= 0x7f - b'9') to each byte.
60    let y = chunk.wrapping_add(0x4646464646464646);
61    // In x now all original bytes < b'0' have the highest bit set, and
62    // in y now all original bytes > b'9' are > 0x7f.
63    // Then, in x|y all original bytes besides b'0' .. b'9' are > 0x7f.
64    // Thus, bitwise-and with 0x80 gives 0 for all original bytes b'0' .. b'9'
65    // and 0x7f for all others.
66    (x | y) & 0x8080808080808080 == 0
67}
68
69/// Convert an u64 holding a sequence of 8 decimal digits into an u64.
70const fn chunk_to_u64(mut chunk: u64) -> u64 {
71    // The following is adopted from Johnny Lee: Fast numeric string to int
72    // [https://johnnylee-sde.github.io/Fast-numeric-string-to-int].
73    chunk &= 0x0f0f0f0f0f0f0f0f;
74    chunk = (chunk & 0x000f000f000f000f)
75        .wrapping_mul(10)
76        .wrapping_add((chunk >> 8) & 0x000f000f000f000f);
77    chunk = (chunk & 0x0000007f0000007f)
78        .wrapping_mul(100)
79        .wrapping_add((chunk >> 16) & 0x0000007f0000007f);
80    (chunk & 0x3fff)
81        .wrapping_mul(10000)
82        .wrapping_add((chunk >> 32) & 0x3fff)
83}
84
85// Bytes wrapper specialized for parsing decimal number literals
86struct AsciiDecLit<'a> {
87    bytes: &'a [u8],
88}
89
90impl<'a> AsciiDecLit<'a> {
91    const fn new(bytes: &'a [u8]) -> Self {
92        Self { bytes }
93    }
94
95    const fn is_empty(&self) -> bool {
96        self.bytes.is_empty()
97    }
98
99    const fn len(&self) -> usize {
100        self.bytes.len()
101    }
102
103    /// self <- self[n..]
104    unsafe fn skip_n(&mut self, n: usize) -> &mut Self {
105        debug_assert!(self.bytes.len() >= n);
106        self.bytes = self.bytes.get_unchecked(n..);
107        self
108    }
109
110    /// self <- self[n..]
111    unsafe fn skip_1(&mut self) -> &mut Self {
112        self.skip_n(1)
113    }
114
115    const fn first(&self) -> Option<&u8> {
116        self.bytes.first()
117    }
118
119    fn first_eq(&self, b: u8) -> bool {
120        Some(&b) == self.first()
121    }
122
123    const fn first_is_digit(&self) -> bool {
124        matches!(self.first(), Some(c) if c.wrapping_sub(b'0') < 10)
125    }
126
127    fn skip_leading_zeroes(&mut self) -> &mut Self {
128        while self.first_eq(b'0') {
129            // Safety: safe because of condition above!
130            unsafe {
131                self.skip_1();
132            };
133        }
134        self
135    }
136
137    // Read 8 bytes as u64 (little-endian).
138    unsafe fn read_u64_unchecked(&self) -> u64 {
139        debug_assert!(self.bytes.len() >= 8);
140        let src = self.bytes.as_ptr() as *const u64;
141        u64::from_le(ptr::read_unaligned(src))
142    }
143
144    // Try to read the next 8 bytes from self.
145    fn read_u64(&self) -> Option<u64> {
146        (self.len() >= 8).then(||
147            // Safety: safe because of condition above!
148            unsafe { self.read_u64_unchecked() })
149    }
150
151    /// Convert the leading sequence of decimal digits in `self` (if any) into
152    /// an int and accumulate it into `coeff`.
153    // The function uses wrapping_mul and wrapping_add, so overflow can
154    // happen; it must be checked later!
155    fn accum_coeff(&mut self, coeff: &mut u128) -> usize {
156        let start_len = self.len();
157        // First, try chunks of 8 digits
158        while let Some(k) = self.read_u64() {
159            if chunk_contains_8_digits(k) {
160                *coeff = coeff
161                    .wrapping_mul(100000000)
162                    .wrapping_add(chunk_to_u64(k) as u128);
163                // Safety: safe because of call to self.read_u64 above
164                unsafe {
165                    self.skip_n(8);
166                }
167            } else {
168                break;
169            }
170        }
171        // Handle remaining digits
172        while let Some(c) = self.first() {
173            let d = c.wrapping_sub(b'0');
174            if d < 10 {
175                *coeff = coeff.wrapping_mul(10).wrapping_add(d as u128);
176                // Safety: safe because of call to self.first above
177                unsafe {
178                    self.skip_1();
179                }
180            } else {
181                break;
182            }
183        }
184        start_len - self.len()
185    }
186
187    /// Convert the leading sequence of decimal digits in `self` (if any) into
188    /// an int and accumulate it into `exp`.
189    // The function uses wrapping_mul and wrapping_add, but overflow is
190    // prevented by limiting the result to a value which will cause an error
191    // later!
192    fn accum_exp(&mut self, exp: &mut isize) -> usize {
193        let start_len = self.len();
194        while let Some(c) = self.first() {
195            let d = c.wrapping_sub(b'0');
196            if d < 10 {
197                if *exp < 0x1000000 {
198                    *exp = exp.wrapping_mul(10).wrapping_add(d as isize);
199                }
200                // Safety: safe because of call to self.first above
201                unsafe {
202                    self.skip_1();
203                }
204            } else {
205                break;
206            }
207        }
208        start_len - self.len()
209    }
210}
211
212/// Convert a decimal number literal into a representation in the form
213/// (coefficient, exponent), so that number == coefficient * 10 ^ exponent.
214///
215/// The literal must be in the form
216///
217/// `[+|-]<int>[.<frac>][<e|E>[+|-]<exp>]`
218///
219/// or
220///
221/// `[+|-].<frac>[<e|E>[+|-]<exp>]`.
222#[doc(hidden)]
223pub fn str_to_dec(lit: &str) -> Result<(i128, isize), ParseDecimalError> {
224    let mut lit = AsciiDecLit::new(lit.as_ref());
225    let is_negative = match lit.first() {
226        None => {
227            return Err(ParseDecimalError::Empty);
228        }
229        Some(&c) if c == b'-' => {
230            // Safety: safe because of match
231            unsafe { lit.skip_1() };
232            true
233        }
234        Some(&c) if c == b'+' => {
235            // Safety: safe because of match
236            unsafe { lit.skip_1() };
237            false
238        }
239        _ => false,
240    };
241    if lit.is_empty() {
242        return Err(ParseDecimalError::Invalid);
243    }
244    lit.skip_leading_zeroes();
245    if lit.is_empty() {
246        // There must have been atleast one zero. Ignore sign.
247        return Ok((0, 0));
248    }
249    let mut coeff = 0_u128;
250    // Parse integral digits.
251    let n_int_digits = lit.accum_coeff(&mut coeff);
252    // Check for radix point and parse fractional digits.
253    let mut n_frac_digits = 0_usize;
254    if let Some(c) = lit.first() {
255        if *c == b'.' {
256            // Safety: safe because of condition above
257            unsafe { lit.skip_1() };
258            n_frac_digits = lit.accum_coeff(&mut coeff);
259        }
260    }
261    let n_digits = n_int_digits + n_frac_digits;
262    if n_digits == 0 {
263        return Err(ParseDecimalError::Invalid);
264    }
265    // check for overflow
266    // 1. 10^e > i128::MAX for e > 39
267    // 2. e = 39 && coeff < 10³⁸ (overflow occured during accumulation)
268    // 3. coeff > i128::MAX
269    if n_digits > 39
270        || n_digits == 39
271            && coeff < 100000000000000000000000000000000000000_u128
272        || coeff > i128::MAX as u128
273    {
274        return Err(ParseDecimalError::InternalOverflow);
275    }
276    let mut exp = 0_isize;
277    // check for explicit exponent
278    if let Some(c) = lit.first() {
279        if *c == b'e' || *c == b'E' {
280            // Safety: safe because of condition above
281            unsafe { lit.skip_1() };
282            let exp_is_negative = match lit.first() {
283                None => {
284                    return Err(ParseDecimalError::Invalid);
285                }
286                Some(&c) if c == b'-' => {
287                    // Safety: safe because of match
288                    unsafe { lit.skip_1() };
289                    true
290                }
291                Some(&c) if c == b'+' => {
292                    // Safety: safe because of match
293                    unsafe { lit.skip_1() };
294                    false
295                }
296                _ => false,
297            };
298            let n_exp_digits = lit.accum_exp(&mut exp);
299            if exp_is_negative {
300                exp = -exp;
301            }
302            if n_exp_digits > 2 {
303                return Err(ParseDecimalError::FracDigitLimitExceeded);
304            }
305        } else {
306            return Err(ParseDecimalError::Invalid);
307        }
308    }
309    if !lit.is_empty() {
310        return Err(ParseDecimalError::Invalid);
311    }
312    exp -= n_frac_digits as isize;
313    if -exp > crate::MAX_N_FRAC_DIGITS as isize {
314        return Err(ParseDecimalError::FracDigitLimitExceeded);
315    }
316    if is_negative {
317        Ok((-(coeff as i128), exp))
318    } else {
319        Ok((coeff as i128, exp))
320    }
321}
322
323#[cfg(test)]
324mod tests {
325    use super::*;
326    // use crate::str2dec;
327
328    #[test]
329    fn test_parse_int_lit() {
330        let res = str_to_dec("1957945").unwrap();
331        assert_eq!(res, (1957945, 0));
332    }
333
334    #[test]
335    fn test_parse_dec_lit() {
336        let res = str_to_dec("-17.5").unwrap();
337        assert_eq!(res, (-175, -1));
338    }
339
340    #[test]
341    fn test_parse_frac_only_lit() {
342        let res = str_to_dec("+.75").unwrap();
343        assert_eq!(res, (75, -2));
344    }
345
346    #[test]
347    fn test_parse_int_lit_neg_exp() {
348        let res = str_to_dec("17e-5").unwrap();
349        assert_eq!(res, (17, -5));
350    }
351
352    #[test]
353    fn test_parse_int_lit_pos_exp() {
354        let res = str_to_dec("+217e3").unwrap();
355        assert_eq!(res, (217, 3));
356    }
357
358    #[test]
359    fn test_parse_dec_lit_neg_exp() {
360        let res = str_to_dec("-533.7e-2").unwrap();
361        assert_eq!(res, (-5337, -3));
362    }
363
364    #[test]
365    fn test_parse_dec_lit_pos_exp() {
366        let res = str_to_dec("700004.002E13").unwrap();
367        assert_eq!(res, (700004002, 10));
368    }
369
370    #[test]
371    fn test_err_empty_str() {
372        let res = str_to_dec("");
373        assert!(res.is_err());
374        let err = res.unwrap_err();
375        assert_eq!(err, ParseDecimalError::Empty);
376    }
377
378    #[test]
379    fn test_err_invalid_lit() {
380        let lits = [" ", "+", "-4.33.2", "2.87 e3", "+e3", ".4e3 "];
381        for lit in lits {
382            let res = str_to_dec(lit);
383            assert!(res.is_err());
384            let err = res.unwrap_err();
385            assert_eq!(err, ParseDecimalError::Invalid);
386        }
387    }
388
389    #[test]
390    fn test_frac_limit_exceeded() {
391        let res = str_to_dec("0.17295887390016377542");
392        assert!(res.is_err());
393        let err = res.unwrap_err();
394        assert_eq!(err, ParseDecimalError::FracDigitLimitExceeded);
395    }
396
397    #[test]
398    fn test_frac_limit_exceeded_with_exp() {
399        let res = str_to_dec("17.493e-36");
400        assert!(res.is_err());
401        let err = res.unwrap_err();
402        assert_eq!(err, ParseDecimalError::FracDigitLimitExceeded);
403    }
404
405    #[test]
406    fn test_int_lit_max_val_exceeded() {
407        let s = "170141183460469231731687303715884105728";
408        let res = str_to_dec(s);
409        assert!(res.is_err());
410        let err = res.unwrap_err();
411        assert_eq!(err, ParseDecimalError::InternalOverflow);
412    }
413
414    #[test]
415    fn test_dec_lit_max_val_exceeded() {
416        let s = "1701411834604692317316873037158841058.00";
417        let res = str_to_dec(s);
418        assert!(res.is_err());
419        let err = res.unwrap_err();
420        assert_eq!(err, ParseDecimalError::InternalOverflow);
421    }
422}