nash_parse/number.rs
1//! Number parsing for Nash.
2//!
3//! Ported from Elm's `Parse/Number.hs`.
4//! Currently only supports integers (no floats).
5
6use crate::error;
7use crate::{Col, Parser, Row};
8
9impl<'a> Parser<'a> {
10 /// Parse an integer literal with custom error constructors.
11 ///
12 /// Mirrors Elm's `Number.number`:
13 /// ```haskell
14 /// number :: (Row -> Col -> x) -> (E.Number -> Row -> Col -> x) -> Parser x Number
15 /// ```
16 ///
17 /// Takes two error constructors:
18 /// - `to_expectation`: called when no digit is found (empty error, no input consumed)
19 /// - `to_error`: called when parsing fails after consuming input
20 ///
21 /// Handles:
22 /// - Decimal integers: `42`, `123`
23 /// - Hex integers: `0xFF`, `0x1A2B`
24 ///
25 /// # Example
26 /// ```ignore
27 /// // From expression parsing:
28 /// self.number_literal(error::Expr::Start, error::Expr::Number)
29 /// ```
30 pub fn number_literal<E>(
31 &mut self,
32 to_expectation: impl FnOnce(Row, Col) -> E,
33 to_error: impl FnOnce(error::Number, Row, Col) -> E,
34 ) -> Result<i128, E> {
35 let (row, col) = self.position();
36
37 // Check first - if not a digit, return expectation error WITHOUT consuming
38 let first = match self.peek() {
39 Some(b) if b.is_ascii_digit() => b,
40 _ => return Err(to_expectation(row, col)),
41 };
42
43 // Now we're committed - consume the first digit
44 self.advance();
45
46 let result = if first == b'0' {
47 self.chomp_zero()
48 } else {
49 self.chomp_int((first - b'0') as i128)
50 };
51
52 result.map_err(|e| to_error(e, self.row(), self.col()))
53 }
54
55 /// Continue parsing after seeing a leading '0'.
56 fn chomp_zero(&mut self) -> Result<i128, error::Number> {
57 match self.peek() {
58 None => Ok(0),
59
60 Some(b'x') | Some(b'X') => {
61 self.advance();
62 self.chomp_hex()
63 }
64
65 Some(b) if b.is_ascii_digit() => {
66 // Leading zeros not allowed: 007, 00, etc.
67 Err(error::Number::NoLeadingZero)
68 }
69
70 Some(b) if is_ident_inner(b) => {
71 // 0abc - dirty end
72 Err(error::Number::End)
73 }
74
75 Some(_) => Ok(0),
76 }
77 }
78
79 /// Parse remaining decimal digits after the first non-zero digit.
80 fn chomp_int(&mut self, mut n: i128) -> Result<i128, error::Number> {
81 loop {
82 match self.peek() {
83 Some(b) if b.is_ascii_digit() => {
84 n = n * 10 + (b - b'0') as i128;
85 self.advance();
86 }
87
88 Some(b) if is_ident_inner(b) => {
89 // 123abc - dirty end
90 return Err(error::Number::End);
91 }
92
93 _ => return Ok(n),
94 }
95 }
96 }
97
98 /// Parse hex digits after `0x`.
99 fn chomp_hex(&mut self) -> Result<i128, error::Number> {
100 let mut n: i128 = 0;
101 let mut has_digits = false;
102
103 loop {
104 match self.peek() {
105 Some(b) if b.is_ascii_hexdigit() => {
106 has_digits = true;
107 n = n * 16 + hex_value(b) as i128;
108 self.advance();
109 }
110
111 Some(b) if is_ident_inner(b) => {
112 // 0xGG or 0x1G - invalid hex followed by ident char
113 return Err(error::Number::HexDigit);
114 }
115
116 _ => {
117 if has_digits {
118 return Ok(n);
119 } else {
120 // 0x without any hex digits
121 return Err(error::Number::HexDigit);
122 }
123 }
124 }
125 }
126 }
127}
128
129/// Check if a byte is a valid inner identifier character.
130/// Used to detect "dirty end" like `123abc`.
131#[inline]
132fn is_ident_inner(b: u8) -> bool {
133 b.is_ascii_alphabetic() || b == b'_'
134}
135
136/// Get the numeric value of a hex digit.
137#[inline]
138fn hex_value(b: u8) -> u8 {
139 match b {
140 b'0'..=b'9' => b - b'0',
141 b'a'..=b'f' => b - b'a' + 10,
142 b'A'..=b'F' => b - b'A' + 10,
143 _ => unreachable!(),
144 }
145}