decimal_rs/
parse.rs

1// Copyright 2021 CoD Technologies Corp.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Decimal parsing utilities.
16
17use crate::convert::MAX_I128_REPR;
18use crate::decimal::{MAX_PRECISION, MAX_SCALE, MIN_SCALE};
19use crate::error::DecimalParseError;
20use crate::Decimal;
21use std::convert::TryInto;
22use std::str::FromStr;
23
24#[derive(Debug, PartialEq)]
25enum Sign {
26    Positive,
27    Negative,
28}
29
30/// The interesting parts of a decimal string.
31#[derive(Debug)]
32struct Parts<'a> {
33    pub sign: Sign,
34    pub integral: &'a [u8],
35    pub fractional: &'a [u8],
36    pub exp: i16,
37}
38
39/// Splits a decimal string bytes into sign and the rest, without inspecting or validating the rest.
40#[inline]
41fn extract_sign(s: &[u8]) -> (Sign, &[u8]) {
42    match s.first() {
43        Some(b'+') => (Sign::Positive, &s[1..]),
44        Some(b'-') => (Sign::Negative, &s[1..]),
45        _ => (Sign::Positive, s),
46    }
47}
48
49/// Carves off decimal digits up to the first non-digit character.
50#[inline]
51fn eat_digits(s: &[u8]) -> (&[u8], &[u8]) {
52    let i = s.iter().take_while(|&i| i.is_ascii_digit()).count();
53    (&s[..i], &s[i..])
54}
55
56/// Extracts exponent, if any.
57fn extract_exponent(s: &[u8], decimal_is_zero: bool) -> Result<(i16, &[u8]), DecimalParseError> {
58    let (sign, s) = extract_sign(s);
59    let (mut number, s) = eat_digits(s);
60
61    if number.is_empty() {
62        return Err(DecimalParseError::Invalid);
63    }
64
65    if decimal_is_zero {
66        return Ok((0, s));
67    }
68
69    while number.first() == Some(&b'0') {
70        number = &number[1..];
71    }
72
73    if number.len() > 3 {
74        return match sign {
75            Sign::Positive => Err(DecimalParseError::Overflow),
76            Sign::Negative => Err(DecimalParseError::Underflow),
77        };
78    }
79
80    let exp = {
81        let mut result: i16 = 0;
82        for &n in number {
83            result = result * 10 + (n - b'0') as i16;
84        }
85        match sign {
86            Sign::Positive => result,
87            Sign::Negative => -result,
88        }
89    };
90
91    Ok((exp, s))
92}
93
94/// Checks if the input string is a valid decimal and if so, locate the integral
95/// part, the fractional part, and the exponent in it.
96fn parse_decimal(s: &[u8]) -> Result<(Parts, &[u8]), DecimalParseError> {
97    let (sign, s) = extract_sign(s);
98
99    if s.is_empty() {
100        return Err(DecimalParseError::Invalid);
101    }
102
103    let (mut integral, s) = eat_digits(s);
104
105    while integral.first() == Some(&b'0') && integral.len() > 1 {
106        integral = &integral[1..];
107    }
108
109    let (fractional, exp, s) = match s.first() {
110        Some(&b'e') | Some(&b'E') => {
111            if integral.is_empty() {
112                return Err(DecimalParseError::Invalid);
113            }
114
115            let decimal_is_zero = integral[0] == b'0';
116            let (exp, s) = extract_exponent(&s[1..], decimal_is_zero)?;
117            (&b""[..], exp, s)
118        }
119        Some(&b'.') => {
120            let (mut fractional, s) = eat_digits(&s[1..]);
121            if integral.is_empty() && fractional.is_empty() {
122                return Err(DecimalParseError::Invalid);
123            }
124
125            while fractional.last() == Some(&b'0') {
126                fractional = &fractional[0..fractional.len() - 1];
127            }
128
129            match s.first() {
130                Some(&b'e') | Some(&b'E') => {
131                    let decimal_is_zero = (integral.is_empty() || integral[0] == b'0') && fractional.is_empty();
132                    let (exp, s) = extract_exponent(&s[1..], decimal_is_zero)?;
133                    (fractional, exp, s)
134                }
135                _ => (fractional, 0, s),
136            }
137        }
138        _ => {
139            if integral.is_empty() {
140                return Err(DecimalParseError::Invalid);
141            }
142
143            (&b""[..], 0, s)
144        }
145    };
146
147    Ok((
148        Parts {
149            sign,
150            integral,
151            fractional,
152            exp,
153        },
154        s,
155    ))
156}
157
158/// Carves off whitespaces up to the first non-whitespace character.
159#[inline]
160fn eat_whitespaces(s: &[u8]) -> &[u8] {
161    let i = s.iter().take_while(|&i| i.is_ascii_whitespace()).count();
162    &s[i..]
163}
164
165/// Extracts `NaN` value.
166#[inline]
167fn extract_nan(s: &[u8]) -> (bool, &[u8]) {
168    if s.len() < 3 {
169        (false, s)
170    } else {
171        let mut buf: [u8; 3] = s[0..3].try_into().unwrap();
172        buf.make_ascii_lowercase();
173        if &buf == b"nan" {
174            (true, &s[3..])
175        } else {
176            (false, s)
177        }
178    }
179}
180
181/// Parses a string bytes and put the number into this variable.
182///
183/// This function does not handle leading or trailing spaces, and it doesn't
184/// accept `NaN` either. It returns the remaining string bytes so that caller can
185/// check for trailing spaces/garbage if deemed necessary.
186#[inline]
187fn parse_str(s: &[u8]) -> Result<(Decimal, &[u8]), DecimalParseError> {
188    let (
189        Parts {
190            sign,
191            integral,
192            fractional,
193            exp,
194        },
195        s,
196    ) = parse_decimal(s)?;
197
198    let mut integral = integral;
199    let mut fractional = fractional;
200    let mut scale = -exp;
201
202    let mut carry = false;
203    const MAX_PRECISION_USIZE: usize = MAX_PRECISION as usize;
204
205    // normalized_exp is the exponent of a number with the format `0.{fractional}E{exponent}`, and the first digit of `fractional` is not 0.
206    // Suppose `a = 123.456e12`, convert `a` to the format above and get `0.123456e15`, then the normalized_exp of a is 15.
207    let mut normalized_exp = exp;
208
209    if integral == b"0" {
210        // fractional only
211        let zero_count = fractional.iter().take_while(|i| **i == b'0').count();
212        normalized_exp -= zero_count as i16;
213
214        let max_fractional_precision = MAX_PRECISION_USIZE + zero_count;
215        if fractional.len() > max_fractional_precision {
216            carry = fractional[max_fractional_precision] > b'4';
217            fractional = &fractional[0..max_fractional_precision];
218        }
219
220        debug_assert!(fractional.len() <= max_fractional_precision);
221    } else {
222        let int_len = integral.len() as i16;
223        normalized_exp += int_len;
224
225        if int_len > MAX_PRECISION_USIZE as i16 {
226            carry = integral[MAX_PRECISION_USIZE] > b'4';
227            scale -= int_len - MAX_PRECISION_USIZE as i16;
228
229            integral = &integral[0..MAX_PRECISION_USIZE];
230            fractional = &[];
231        } else {
232            let max_fractional_precision = MAX_PRECISION_USIZE - int_len as usize;
233            if fractional.len() > max_fractional_precision {
234                carry = fractional[max_fractional_precision] > b'4';
235                fractional = &fractional[0..max_fractional_precision];
236            }
237
238            debug_assert!(fractional.len() <= max_fractional_precision);
239        }
240    };
241
242    let mut int = 0u128;
243    for &i in integral {
244        int = int * 10 + (i - b'0') as u128;
245    }
246    for &i in fractional {
247        int = int * 10 + (i - b'0') as u128;
248    }
249    // So far, `int` precision does not exceed MAX_PRECISION.
250
251    int += carry as u128;
252    if int > MAX_I128_REPR as u128 {
253        normalized_exp += 1;
254        int /= 10;
255        scale -= 1;
256    }
257
258    if normalized_exp <= -MAX_SCALE {
259        return Err(DecimalParseError::Underflow);
260    }
261    if normalized_exp > -MIN_SCALE {
262        return Err(DecimalParseError::Overflow);
263    }
264
265    let negative = if int != 0 { sign == Sign::Negative } else { false };
266
267    scale += fractional.len() as i16;
268    Ok((unsafe { Decimal::from_parts_unchecked(int, scale, negative) }, s))
269}
270
271/// Parses a string slice and creates a decimal.
272///
273/// This function handles leading or trailing spaces, and it
274/// accepts `NaN` either.
275#[inline]
276fn from_str(s: &str) -> Result<Decimal, DecimalParseError> {
277    let s = s.as_bytes();
278    let s = eat_whitespaces(s);
279    if s.is_empty() {
280        return Err(DecimalParseError::Empty);
281    }
282
283    let (is_nan, s) = extract_nan(s);
284
285    if is_nan {
286        Err(DecimalParseError::Invalid)
287    } else {
288        let (n, s) = parse_str(s)?;
289
290        if s.iter().any(|n| !n.is_ascii_whitespace()) {
291            return Err(DecimalParseError::Invalid);
292        }
293
294        Ok(n)
295    }
296}
297
298impl FromStr for Decimal {
299    type Err = DecimalParseError;
300
301    #[inline]
302    fn from_str(s: &str) -> Result<Self, Self::Err> {
303        from_str(s)
304    }
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310
311    fn assert_parse_empty<S: AsRef<str>>(s: S) {
312        let result = s.as_ref().parse::<Decimal>();
313        assert_eq!(result.unwrap_err(), DecimalParseError::Empty);
314    }
315
316    fn assert_parse_invalid<S: AsRef<str>>(s: S) {
317        let result = s.as_ref().parse::<Decimal>();
318        assert_eq!(result.unwrap_err(), DecimalParseError::Invalid);
319    }
320
321    fn assert_parse_overflow<S: AsRef<str>>(s: S) {
322        let result = s.as_ref().parse::<Decimal>();
323        assert_eq!(result.unwrap_err(), DecimalParseError::Overflow);
324    }
325
326    fn assert_parse_underflow<S: AsRef<str>>(s: S) {
327        let result = s.as_ref().parse::<Decimal>();
328        assert_eq!(result.unwrap_err(), DecimalParseError::Underflow);
329    }
330
331    #[test]
332    fn test_parse_error() {
333        assert_parse_empty("");
334        assert_parse_empty("   ");
335        assert_parse_invalid("-");
336        assert_parse_invalid("   -   ");
337        assert_parse_invalid("-.");
338        assert_parse_invalid("- 1");
339        assert_parse_invalid("-NaN");
340        assert_parse_invalid("NaN.");
341        assert_parse_invalid("NaN1");
342        assert_parse_invalid("   NaN   .   ");
343        assert_parse_invalid("   NaN   1   ");
344        assert_parse_invalid(".");
345        assert_parse_invalid("   .   ");
346        assert_parse_invalid("e");
347        assert_parse_invalid("   e   ");
348        assert_parse_invalid("-e");
349        assert_parse_invalid("-1e");
350        assert_parse_invalid("1e1.1");
351        assert_parse_invalid("-1 e1");
352        assert_parse_invalid("   x   ");
353        assert_parse_overflow("1e1000");
354        assert_parse_overflow("1e100000");
355        assert_parse_overflow("1e127");
356        assert_parse_underflow("1e-131");
357        assert_parse_underflow("1e-1000");
358        assert_parse_underflow("1e-100000");
359    }
360
361    fn assert_parse<S: AsRef<str>, V: AsRef<str>>(s: S, expected: V) {
362        let decimal = s.as_ref().parse::<Decimal>().unwrap();
363        assert_eq!(decimal.to_string(), expected.as_ref());
364    }
365
366    #[test]
367    fn test_parse_valid() {
368        // Integer
369        assert_parse("0", "0");
370        assert_parse("-0", "0");
371        assert_parse("   -0   ", "0");
372        assert_parse("00000.", "0");
373        assert_parse("-00000.", "0");
374        assert_parse("128", "128");
375        assert_parse("-128", "-128");
376        assert_parse("65536", "65536");
377        assert_parse("-65536", "-65536");
378        assert_parse("4294967296", "4294967296");
379        assert_parse("-4294967296", "-4294967296");
380        assert_parse("18446744073709551616", "18446744073709551616");
381        assert_parse("-18446744073709551616", "-18446744073709551616");
382        assert_parse(
383            "99999999999999999999999999999999999999",
384            "99999999999999999999999999999999999999",
385        );
386        assert_parse(
387            "0099999999999999999999999999999999999999",
388            "99999999999999999999999999999999999999",
389        );
390        assert_parse(
391            "-99999999999999999999999999999999999999",
392            "-99999999999999999999999999999999999999",
393        );
394        assert_parse("000000000123", "123");
395        assert_parse("-000000000123", "-123");
396        assert_parse(
397            "170141183460469231713240559642175554110",
398            "170141183460469231713240559642175554110",
399        );
400        assert_parse(
401            "999999999999999999999999999999999999990000000000",
402            "999999999999999999999999999999999999990000000000",
403        );
404
405        // Floating-point number
406        assert_parse("0.0", "0");
407        assert_parse("-0.0", "0");
408        assert_parse("   -0.0   ", "0");
409        assert_parse(".0", "0");
410        assert_parse(".00000", "0");
411        assert_parse("-.0", "0");
412        assert_parse("-.00000", "0");
413        assert_parse("128.128", "128.128");
414        assert_parse("-128.128", "-128.128");
415        assert_parse("65536.65536", "65536.65536");
416        assert_parse("-65536.65536", "-65536.65536");
417        assert_parse("4294967296.4294967296", "4294967296.4294967296");
418        assert_parse("-4294967296.4294967296", "-4294967296.4294967296");
419        assert_parse(
420            "9999999999999999999.9999999999999999999",
421            "9999999999999999999.9999999999999999999",
422        );
423        assert_parse(
424            "-9999999999999999999.9999999999999999999",
425            "-9999999999999999999.9999999999999999999",
426        );
427        assert_parse("000000000123.000000000123", "123.000000000123");
428        assert_parse("-000000000123.000000000123", "-123.000000000123");
429        assert_parse(
430            "00.000000000000000000000000000000000000123",
431            "0.000000000000000000000000000000000000123",
432        );
433        assert_parse("00.000000000000000000000000000000000000123e-87", "0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000123");
434        assert_parse("99999999999999999999999999999999999999500000000000000000000000000000000000000000000000000000000000000000000000000000000000000", "100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000");
435
436        // Scientific notation
437        assert_parse("0e0", "0");
438        assert_parse("-0E-0", "0");
439        assert_parse("0000000000E0000000000", "0");
440        assert_parse("-0000000000E-0000000000", "0");
441        assert_parse("00000000001e0000000000", "1");
442        assert_parse("-00000000001e-0000000000", "-1");
443        assert_parse("00000000001e00000000001", "10");
444        assert_parse("-00000000001e-00000000001", "-0.1");
445        assert_parse("1e10", "10000000000");
446        assert_parse("-1e-10", "-0.0000000001");
447        assert_parse("0000001.23456000e3", "1234.56");
448        assert_parse("-0000001.23456000E-3", "-0.00123456");
449        assert_parse("0e999", "0");
450        assert_parse("0e+99999", "0");
451        assert_parse("0e9999999", "0");
452        assert_parse("0.e999", "0");
453        assert_parse("0.e+99999", "0");
454        assert_parse("0.e9999999", "0");
455        assert_parse("0.0e999", "0");
456        assert_parse("0.0e+99999", "0");
457        assert_parse("0.0e9999999", "0");
458        assert_parse("0.0000e999", "0");
459        assert_parse("0.0000e+99999", "0");
460        assert_parse("0.0000e9999999", "0");
461        assert_parse(".000e999", "0");
462        assert_parse(".000e+99999", "0");
463        assert_parse(".000e9999999", "0");
464    }
465
466    #[test]
467    fn test_parse_boundary() {
468        assert_parse("100E-131", "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100");
469        assert_parse("0.000012345E130", "123450000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000");
470        assert_parse("4.94065645841247E-126", "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000494065645841247");
471        assert_parse("1234.94065645841247E-126", "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000123494065645841247");
472        assert_parse("12345678987654321999999E-132", "0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012345678987654321999999");
473        assert_parse("10000000000000000000000000000000000000e88", "100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000");
474        assert_parse("0.999999999999999999999999999999999999995e-130", "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000");
475        assert_parse_underflow("0.999999999999999999999999999999999999995e-131");
476        assert_parse_overflow("999999999999999999999999999999999999995000000000000000000000000000000000000000000000000000000000000000000000000000000000000000");
477    }
478
479    #[test]
480    fn test_parse_over_precision_but_valid() {
481        // integer only
482        assert_parse(
483            "999999999999999999999999999999999999999",
484            "1000000000000000000000000000000000000000",
485        );
486        assert_parse(
487            "900719925474099290071992547409929007112123123123123",
488            "900719925474099290071992547409929007110000000000000",
489        );
490
491        // fractional only
492        assert_parse(
493            "0.123123123123123135555555555555555555555555555555",
494            "0.12312312312312313555555555555555555556",
495        );
496        assert_parse(
497            "0.0000000123123123123123135555555555555555555555555555555",
498            "0.000000012312312312312313555555555555555555556",
499        );
500        assert_parse(
501            "0.0000000123123123123123135555555555555515555555555555555",
502            "0.000000012312312312312313555555555555551555556",
503        );
504        assert_parse(
505            "0.0000000123123123123123135555555555555565555551555555555",
506            "0.000000012312312312312313555555555555556555555",
507        );
508
509        // integer over precision
510        assert_parse(
511            "1231231231231231231231231255555555555555555555.123",
512            "1231231231231231231231231255555555555600000000",
513        );
514
515        // integer + fractional over precision
516        assert_parse(
517            "123123.5555555555555555555555555555555555555555",
518            "123123.55555555555555555555555555555556",
519        );
520
521        assert_parse_overflow("90071992547409929007199254740992900711212312312312312312312312312311111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111");
522    }
523}