Skip to main content

nash_parse/
number.rs

1//! Number parsing for Nash.
2//!
3//! Ported from Elm's `Parse/Number.hs`.
4//! Currently only supports integers (no floats).
5
6use crate::error;
7use crate::{Col, Parser, Row};
8
9impl<'a> Parser<'a> {
10    /// Parse an integer literal with custom error constructors.
11    ///
12    /// Mirrors Elm's `Number.number`:
13    /// ```haskell
14    /// number :: (Row -> Col -> x) -> (E.Number -> Row -> Col -> x) -> Parser x Number
15    /// ```
16    ///
17    /// Takes two error constructors:
18    /// - `to_expectation`: called when no digit is found (empty error, no input consumed)
19    /// - `to_error`: called when parsing fails after consuming input
20    ///
21    /// Handles:
22    /// - Decimal integers: `42`, `123`
23    /// - Hex integers: `0xFF`, `0x1A2B`
24    ///
25    /// # Example
26    /// ```ignore
27    /// // From expression parsing:
28    /// self.number_literal(error::Expr::Start, error::Expr::Number)
29    /// ```
30    pub fn number_literal<E>(
31        &mut self,
32        to_expectation: impl FnOnce(Row, Col) -> E,
33        to_error: impl FnOnce(error::Number, Row, Col) -> E,
34    ) -> Result<i128, E> {
35        let (row, col) = self.position();
36
37        // Check first - if not a digit, return expectation error WITHOUT consuming
38        let first = match self.peek() {
39            Some(b) if b.is_ascii_digit() => b,
40            _ => return Err(to_expectation(row, col)),
41        };
42
43        // Now we're committed - consume the first digit
44        self.advance();
45
46        let result = if first == b'0' {
47            self.chomp_zero()
48        } else {
49            self.chomp_int((first - b'0') as i128)
50        };
51
52        result.map_err(|e| to_error(e, self.row(), self.col()))
53    }
54
55    /// Continue parsing after seeing a leading '0'.
56    fn chomp_zero(&mut self) -> Result<i128, error::Number> {
57        match self.peek() {
58            None => Ok(0),
59
60            Some(b'x') | Some(b'X') => {
61                self.advance();
62                self.chomp_hex()
63            }
64
65            Some(b) if b.is_ascii_digit() => {
66                // Leading zeros not allowed: 007, 00, etc.
67                Err(error::Number::NoLeadingZero)
68            }
69
70            Some(b) if is_ident_inner(b) => {
71                // 0abc - dirty end
72                Err(error::Number::End)
73            }
74
75            Some(_) => Ok(0),
76        }
77    }
78
79    /// Parse remaining decimal digits after the first non-zero digit.
80    fn chomp_int(&mut self, mut n: i128) -> Result<i128, error::Number> {
81        loop {
82            match self.peek() {
83                Some(b) if b.is_ascii_digit() => {
84                    n = n * 10 + (b - b'0') as i128;
85                    self.advance();
86                }
87
88                Some(b) if is_ident_inner(b) => {
89                    // 123abc - dirty end
90                    return Err(error::Number::End);
91                }
92
93                _ => return Ok(n),
94            }
95        }
96    }
97
98    /// Parse hex digits after `0x`.
99    fn chomp_hex(&mut self) -> Result<i128, error::Number> {
100        let mut n: i128 = 0;
101        let mut has_digits = false;
102
103        loop {
104            match self.peek() {
105                Some(b) if b.is_ascii_hexdigit() => {
106                    has_digits = true;
107                    n = n * 16 + hex_value(b) as i128;
108                    self.advance();
109                }
110
111                Some(b) if is_ident_inner(b) => {
112                    // 0xGG or 0x1G - invalid hex followed by ident char
113                    return Err(error::Number::HexDigit);
114                }
115
116                _ => {
117                    if has_digits {
118                        return Ok(n);
119                    } else {
120                        // 0x without any hex digits
121                        return Err(error::Number::HexDigit);
122                    }
123                }
124            }
125        }
126    }
127}
128
129/// Check if a byte is a valid inner identifier character.
130/// Used to detect "dirty end" like `123abc`.
131#[inline]
132fn is_ident_inner(b: u8) -> bool {
133    b.is_ascii_alphabetic() || b == b'_'
134}
135
136/// Get the numeric value of a hex digit.
137#[inline]
138fn hex_value(b: u8) -> u8 {
139    match b {
140        b'0'..=b'9' => b - b'0',
141        b'a'..=b'f' => b - b'a' + 10,
142        b'A'..=b'F' => b - b'A' + 10,
143        _ => unreachable!(),
144    }
145}