astro_float_num/
parser.rs

1//! Parser parses numbers represented in scientific format.
2
3use crate::defs::Exponent;
4use crate::defs::Sign;
5use crate::defs::EXPONENT_MAX;
6use crate::Error;
7use crate::Radix;
8use crate::EXPONENT_MIN;
9use core::str::Chars;
10
11#[cfg(not(feature = "std"))]
12use alloc::vec::Vec;
13
14pub struct ParserState<'a> {
15    chars: Chars<'a>,
16    cur_ch: Option<char>,
17    s_len: usize,
18    sign: Sign,
19    mantissa_bytes: Vec<u8>,
20    e: isize,
21    inf: bool,
22    nan: bool,
23}
24
25impl<'a> ParserState<'a> {
26    fn new(s: &'a str) -> Self {
27        ParserState {
28            chars: s.chars(),
29            s_len: s.len(),
30            cur_ch: None,
31            sign: Sign::Pos,
32            mantissa_bytes: Vec::new(),
33            e: 0,
34            inf: false,
35            nan: true,
36        }
37    }
38
39    // Returns next character of a string in lower case,
40    // or None if string end reached.
41    fn next_char(&mut self) -> Option<char> {
42        self.cur_ch = self.chars.next().map(|c| c.to_ascii_lowercase());
43        self.cur_ch
44    }
45
46    fn cur_char(&self) -> Option<char> {
47        self.cur_ch
48    }
49
50    pub fn is_inf(&self) -> bool {
51        self.inf
52    }
53
54    pub fn is_nan(&self) -> bool {
55        self.nan
56    }
57
58    pub fn sign(&self) -> Sign {
59        self.sign
60    }
61
62    /// Returns mantissa digits, mantissa length, sign, exponent.
63    pub fn raw_parts(&self) -> (&[u8], Sign, Exponent) {
64        (&self.mantissa_bytes, self.sign, self.e as Exponent)
65    }
66}
67
68/// Parse BigFloat.
69pub fn parse(s: &str, rdx: Radix) -> Result<ParserState, Error> {
70    let mut parser_state = ParserState::new(s);
71    let mut ch = parser_state.next_char();
72
73    // sign
74    if let Some(c) = ch {
75        match c {
76            '+' => ch = parser_state.next_char(),
77            '-' => {
78                parser_state.sign = Sign::Neg;
79                ch = parser_state.next_char()
80            }
81            _ => {}
82        };
83    }
84
85    if let Some(c) = ch {
86        match (c, rdx) {
87            ('i', _) => parse_inf(&mut parser_state),
88            ('n', _) => parse_nan(&mut parser_state),
89            ('.' | '0' | '1', Radix::Bin) => parse_num(&mut parser_state, rdx)?,
90            ('.' | '0'..='7', Radix::Oct) => parse_num(&mut parser_state, rdx)?,
91            ('.' | '0'..='9', Radix::Dec) => parse_num(&mut parser_state, rdx)?,
92            ('.' | '0'..='9' | 'a'..='f', Radix::Hex) => parse_num(&mut parser_state, rdx)?,
93            _ => {}
94        };
95    }
96
97    Ok(parser_state)
98}
99
100fn parse_inf(parser_state: &mut ParserState) {
101    let n = parser_state.next_char();
102    let f = parser_state.next_char();
103    if Some('n') == n && Some('f') == f {
104        parser_state.inf = true;
105        parser_state.nan = false;
106    }
107}
108
109fn parse_nan(parser_state: &mut ParserState) {
110    let a = parser_state.next_char();
111    let n = parser_state.next_char();
112    if Some('n') == n && Some('a') == a {
113        parser_state.nan = true;
114    }
115}
116
117fn parse_num(parser_state: &mut ParserState, rdx: Radix) -> Result<(), Error> {
118    let (int_len, skip_cnt1) = parse_digits(parser_state, true, true, rdx)?;
119    if Some('.') == parser_state.cur_char() {
120        parser_state.next_char();
121    }
122    let (frac_len, _) = parse_digits(parser_state, false, false, rdx)?;
123    if frac_len > 0 || int_len > 0 {
124        parser_state.nan = false;
125        if rdx == Radix::Hex {
126            if Some('_') == parser_state.cur_char() {
127                parser_state.next_char();
128                if Some('e') == parser_state.cur_char() {
129                    parser_state.next_char();
130                    parse_exp(parser_state, rdx);
131                }
132            }
133        } else if Some('e') == parser_state.cur_char() {
134            parser_state.next_char();
135            parse_exp(parser_state, rdx);
136        }
137        if int_len != 0 {
138            parser_state.e = parser_state.e.saturating_add(int_len as isize);
139        }
140
141        if parser_state.e < EXPONENT_MIN as isize {
142            let mut zero = Vec::new();
143            zero.try_reserve_exact(1)?;
144            zero.push(0);
145            parser_state.mantissa_bytes = zero;
146            parser_state.e = 0;
147        } else if parser_state.e > EXPONENT_MAX as isize {
148            parser_state.inf = true;
149        }
150    } else if skip_cnt1 > 0 {
151        // just zeroes
152        parser_state.nan = false;
153    }
154
155    Ok(())
156}
157
158fn parse_digits(
159    parser_state: &mut ParserState,
160    skip_zeroes: bool,
161    int: bool,
162    rdx: Radix,
163) -> Result<(usize, usize), Error> {
164    let mut ch = parser_state.cur_char();
165    let mut len = 0;
166    let mut skip_cnt = 0;
167
168    if skip_zeroes {
169        // skip leading zeroes
170        while let Some(c) = ch {
171            if is_radix_digit(c, rdx) && c.to_digit(rdx as u32).unwrap() == 0 {
172                // call to unwrap() is unreachable, because c is surely a digit.
173                skip_cnt += 1;
174                if !int {
175                    len += 1; // for fractional part count length
176                }
177            } else {
178                break;
179            }
180            ch = parser_state.next_char();
181        }
182    }
183
184    if ch.is_some() && is_radix_digit(ch.unwrap(), rdx) {
185        parser_state
186            .mantissa_bytes
187            .try_reserve_exact(parser_state.s_len)?;
188
189        while let Some(c) = ch {
190            if is_radix_digit(c, rdx) {
191                parser_state
192                    .mantissa_bytes
193                    .push(c.to_digit(rdx as u32).unwrap() as u8); // call to unwrap() is unreachable, because c is surely a digit.
194                len += 1;
195            } else {
196                break;
197            }
198            ch = parser_state.next_char();
199        }
200    }
201
202    if !int && skip_cnt == len {
203        // just zeroes
204        len = 0;
205    }
206
207    Ok((len, skip_cnt))
208}
209
210fn is_radix_digit(c: char, rdx: Radix) -> bool {
211    matches!(
212        (rdx, c),
213        (Radix::Bin, '0' | '1')
214            | (Radix::Oct, '0'..='7')
215            | (
216                Radix::Dec,
217                '0'..='9'
218            )
219            | (
220                Radix::Hex,
221                '0'..='9' | 'a'..='f'
222            )
223    )
224}
225
226fn parse_exp(parser_state: &mut ParserState, rdx: Radix) {
227    let mut neg = false;
228    let mut ch = parser_state.cur_char();
229    if let Some(c) = ch {
230        match c {
231            '+' => {
232                ch = parser_state.next_char();
233            }
234            '-' => {
235                neg = true;
236                ch = parser_state.next_char();
237            }
238            _ => {}
239        };
240    }
241    let e_thres = EXPONENT_MAX.unsigned_abs().max(EXPONENT_MIN.unsigned_abs()) as isize;
242    while let Some(c) = ch {
243        if is_radix_digit(c, rdx) {
244            if parser_state.e > e_thres {
245                break;
246            }
247            parser_state.e = parser_state.e.saturating_mul(rdx as isize);
248            let digit = c.to_digit(rdx as u32).unwrap(); // call to unwrap() is unreachable, because c is surely a digit.
249            parser_state.e = parser_state.e.saturating_add(digit as isize);
250        } else {
251            break;
252        }
253        ch = parser_state.next_char();
254    }
255    if neg {
256        parser_state.e = -parser_state.e;
257    }
258}
259
260#[cfg(test)]
261mod tests {
262
263    use super::*;
264
265    #[cfg(not(feature = "std"))]
266    use {alloc::format, alloc::string::String, alloc::vec};
267
268    #[test]
269    pub fn test_parser() {
270        // combinations of possible valid components of a number and expected resulting characteristics.
271        let mantissas = ["0.0", "0", ".000", "00.", "000123", "456.", "789.012", ".3456", "0.0078"];
272        let expected_mantissas = [
273            vec![0],
274            vec![],
275            vec![0, 0, 0],
276            vec![],
277            vec![1, 2, 3],
278            vec![4, 5, 6],
279            vec![7, 8, 9, 0, 1, 2],
280            vec![3, 4, 5, 6],
281            vec![0, 0, 7, 8],
282        ];
283        let expected_mantissa_len = [1, 0, 3, 0, 3, 3, 6, 4, 4];
284        let expected_exp_shifts = [0, 0, 0, 0, 3, 3, 3, 0, 0];
285
286        let signs = ["", "+", "-"];
287        let expected_signs = [Sign::Pos, Sign::Pos, Sign::Neg];
288
289        let exponents = ["", "E", "e", "e123", "e+345", "e-678", "e901", "E+234", "E-567"];
290        let expected_exponents = [0, 0, 0, 123, 345, -678, 901, 234, -567];
291
292        let infs = ["inf", "INF", "Inf"];
293        let nans = ["nan", "NaN", "NAN"];
294
295        // test numbers.
296        for i in 0..signs.len() {
297            for j in 0..mantissas.len() {
298                for k in 0..exponents.len() {
299                    let s = signs[i];
300                    let m = mantissas[j];
301                    let e = exponents[k];
302                    let numstr = String::from(s) + m + e;
303
304                    let ps = parse(&numstr, Radix::Dec).unwrap();
305
306                    assert!(!ps.is_inf());
307                    assert!(!ps.is_nan());
308
309                    let (m, s, e) = ps.raw_parts();
310                    assert!(s == expected_signs[i]);
311                    assert!(m == expected_mantissas[j]);
312                    assert!(m.len() == expected_mantissa_len[j]);
313                    if expected_mantissa_len[j] > 0 {
314                        assert!(e == expected_exponents[k] + expected_exp_shifts[j]);
315                    } else {
316                        assert!(e == 0);
317                    }
318                }
319            }
320        }
321
322        // test inf
323        for i in 0..signs.len() {
324            for inf in infs {
325                let s = signs[i];
326                let numstr = String::from(s) + inf;
327
328                let ps = parse(&numstr, Radix::Dec).unwrap();
329
330                assert!(ps.is_inf());
331                assert!(ps.sign() == expected_signs[i]);
332                assert!(!ps.is_nan());
333            }
334        }
335
336        // test nan
337        for nan in nans {
338            let ps = parse(nan, Radix::Dec).unwrap();
339            assert!(!ps.is_inf());
340            assert!(ps.is_nan());
341        }
342
343        // bin
344        let ps = parse("101.00101e+1101", Radix::Bin).unwrap();
345        let (m, s, e) = ps.raw_parts();
346        assert!(m == [1, 0, 1, 0, 0, 1, 0, 1]);
347        assert!(s == Sign::Pos);
348        assert!(e == 16);
349
350        // oct
351        let ps = parse("2670.343e+703", Radix::Oct).unwrap();
352        let (m, s, e) = ps.raw_parts();
353        assert!(m == [2, 6, 7, 0, 3, 4, 3]);
354        assert!(s == Sign::Pos);
355        assert!(e == 0o707);
356
357        // hex
358        let ps = parse("abc.def09123e_e-1fa", Radix::Hex).unwrap();
359        let (m, s, e) = ps.raw_parts();
360        assert!(m == [10, 11, 12, 13, 14, 15, 0, 9, 1, 2, 3, 14]);
361        assert!(s == Sign::Pos);
362        assert!(e == -0x1f7);
363
364        // large exp
365        let numstr;
366        #[cfg(not(target_arch = "x86"))]
367        {
368            numstr = "abc.def09123e_e+7FFFFFFF";
369        }
370        #[cfg(target_arch = "x86")]
371        {
372            numstr = "abc.def09123e_e+1FFFFFFF";
373        }
374        let ps = parse(numstr, Radix::Hex).unwrap();
375        assert!(ps.is_inf());
376        assert!(ps.sign().is_positive());
377
378        let numstr;
379        #[cfg(not(target_arch = "x86"))]
380        {
381            numstr = "-abc.def09123e_e+7FFFFFFF";
382        }
383        #[cfg(target_arch = "x86")]
384        {
385            numstr = "-abc.def09123e_e+1FFFFFFF";
386        }
387        let ps = parse(numstr, Radix::Hex).unwrap();
388        assert!(ps.is_inf());
389        assert!(!ps.is_nan());
390        assert!(ps.sign().is_negative());
391
392        let ps = parse(
393            "-abc.def09123e_e+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
394            Radix::Hex,
395        )
396        .unwrap();
397        assert!(ps.is_inf());
398        assert!(!ps.is_nan());
399        assert!(ps.sign().is_negative());
400
401        let numstr = format!("0.0000abc_e+{:X}", EXPONENT_MAX);
402        let ps = parse(&numstr, Radix::Hex).unwrap();
403        assert!(!ps.is_inf());
404        assert!(!ps.is_nan());
405        let (m, _s, e) = ps.raw_parts();
406        assert_eq!(m, [0, 0, 0, 0, 0xa, 0xb, 0xc]);
407        assert_eq!(e, EXPONENT_MAX);
408
409        // small exp
410        let numstr = format!(
411            "abc.def09123e_e-{:x}",
412            (EXPONENT_MIN as i64 - 4).unsigned_abs()
413        );
414        let ps = parse(&numstr, Radix::Hex).unwrap();
415        assert!(!ps.is_inf());
416        assert!(!ps.is_nan());
417        let (m, _s, e) = ps.raw_parts();
418        assert_eq!(m.iter().filter(|&&x| x != 0).count(), 0);
419        assert!(e == 0);
420
421        let numstr = format!(
422            "0.0000abcdef09123e_e-{:X}",
423            (EXPONENT_MIN as i64).unsigned_abs()
424        );
425        let ps = parse(&numstr, Radix::Hex).unwrap();
426        assert!(!ps.is_inf());
427        assert!(!ps.is_nan());
428        let (m, _s, e) = ps.raw_parts();
429        assert_eq!(
430            m,
431            [0, 0, 0, 0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x9, 0x1, 0x2, 0x3, 0xe]
432        );
433        assert_eq!(e, EXPONENT_MIN);
434
435        let ps = parse(
436            "abc.def09123e_e-ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
437            Radix::Hex,
438        )
439        .unwrap();
440        assert!(!ps.is_inf());
441        assert!(!ps.is_nan());
442        let (m, _s, e) = ps.raw_parts();
443        assert_eq!(m.iter().filter(|&&x| x != 0).count(), 0);
444        assert!(e == 0);
445    }
446}