solar_parse/parser/
lit.rs

1use crate::{unescape, PResult, Parser};
2use alloy_primitives::Address;
3use num_bigint::BigInt;
4use num_rational::BigRational;
5use num_traits::Num;
6use solar_ast::{token::*, *};
7use solar_interface::{diagnostics::ErrorGuaranteed, kw, Symbol};
8use std::{borrow::Cow, fmt};
9
10impl<'sess, 'ast> Parser<'sess, 'ast> {
11    /// Parses a literal.
12    #[instrument(level = "debug", skip_all)]
13    pub fn parse_lit(&mut self) -> PResult<'sess, &'ast mut Lit> {
14        self.parse_spanned(Self::parse_lit_inner)
15            .map(|(span, (symbol, kind))| self.arena.literals.alloc(Lit { span, symbol, kind }))
16    }
17
18    /// Parses a literal with an optional subdenomination.
19    ///
20    /// Note that the subdenomination gets applied to the literal directly, and is returned just for
21    /// display reasons.
22    ///
23    /// Returns None if no subdenomination was parsed or if the literal is not a number or rational.
24    pub fn parse_lit_with_subdenomination(
25        &mut self,
26    ) -> PResult<'sess, (&'ast mut Lit, Option<SubDenomination>)> {
27        let lit = self.parse_lit()?;
28        let mut sub = self.parse_subdenomination();
29        if let opt @ Some(_) = &mut sub {
30            let Some(sub) = opt else { unreachable!() };
31            match &mut lit.kind {
32                LitKind::Number(n) => *n *= sub.value(),
33                l @ LitKind::Rational(_) => {
34                    let LitKind::Rational(n) = l else { unreachable!() };
35                    *n *= BigInt::from(sub.value());
36                    if n.is_integer() {
37                        *l = LitKind::Number(n.to_integer());
38                    }
39                }
40                _ => {
41                    *opt = None;
42                    let msg = "sub-denominations are only allowed on number and rational literals";
43                    self.dcx().err(msg).span(lit.span.to(self.prev_token.span)).emit();
44                }
45            }
46        }
47        Ok((lit, sub))
48    }
49
50    /// Parses a subdenomination.
51    pub fn parse_subdenomination(&mut self) -> Option<SubDenomination> {
52        let sub = self.subdenomination();
53        if sub.is_some() {
54            self.bump();
55        }
56        sub
57    }
58
59    fn subdenomination(&self) -> Option<SubDenomination> {
60        match self.token.ident()?.name {
61            kw::Wei => Some(SubDenomination::Ether(EtherSubDenomination::Wei)),
62            kw::Gwei => Some(SubDenomination::Ether(EtherSubDenomination::Gwei)),
63            kw::Ether => Some(SubDenomination::Ether(EtherSubDenomination::Ether)),
64
65            kw::Seconds => Some(SubDenomination::Time(TimeSubDenomination::Seconds)),
66            kw::Minutes => Some(SubDenomination::Time(TimeSubDenomination::Minutes)),
67            kw::Hours => Some(SubDenomination::Time(TimeSubDenomination::Hours)),
68            kw::Days => Some(SubDenomination::Time(TimeSubDenomination::Days)),
69            kw::Weeks => Some(SubDenomination::Time(TimeSubDenomination::Weeks)),
70            kw::Years => Some(SubDenomination::Time(TimeSubDenomination::Years)),
71
72            _ => None,
73        }
74    }
75
76    /// Emits an error if a subdenomination was parsed.
77    pub(super) fn expect_no_subdenomination(&mut self) {
78        if let Some(_sub) = self.parse_subdenomination() {
79            let span = self.prev_token.span;
80            self.dcx().err("subdenominations aren't allowed here").span(span).emit();
81        }
82    }
83
84    fn parse_lit_inner(&mut self) -> PResult<'sess, (Symbol, LitKind)> {
85        if let TokenKind::Ident(symbol @ (kw::True | kw::False)) = self.token.kind {
86            self.bump();
87            return Ok((symbol, LitKind::Bool(symbol != kw::False)));
88        }
89
90        if !self.check_lit() {
91            return self.unexpected();
92        }
93
94        let Some(lit) = self.token.lit() else {
95            unreachable!("check_lit() returned true for non-literal token");
96        };
97        self.bump();
98        let kind = match lit.kind {
99            TokenLitKind::Integer => self.parse_lit_int(lit.symbol),
100            TokenLitKind::Rational => self.parse_lit_rational(lit.symbol),
101            TokenLitKind::Str | TokenLitKind::UnicodeStr | TokenLitKind::HexStr => {
102                self.parse_lit_str(lit)
103            }
104            TokenLitKind::Err(guar) => Ok(LitKind::Err(guar)),
105        };
106        kind.map(|kind| (lit.symbol, kind))
107    }
108
109    /// Parses an integer literal.
110    fn parse_lit_int(&mut self, symbol: Symbol) -> PResult<'sess, LitKind> {
111        use LitError::*;
112        match parse_integer(symbol) {
113            Ok(l) => Ok(l),
114            // User error.
115            Err(e @ IntegerLeadingZeros) => Err(self.dcx().err(e.to_string())),
116            // User error, but already emitted.
117            Err(EmptyInteger) => Ok(LitKind::Err(ErrorGuaranteed::new_unchecked())),
118            // Lexer internal error.
119            Err(e @ ParseInteger(_)) => panic!("failed to parse integer literal {symbol:?}: {e}"),
120            // Should never happen.
121            Err(
122                e @ (EmptyRational | EmptyExponent | ParseRational(_) | ParseExponent(_)
123                | RationalTooLarge | ExponentTooLarge),
124            ) => panic!("this error shouldn't happen for normal integer literals: {e}"),
125        }
126    }
127
128    /// Parses a rational literal.
129    fn parse_lit_rational(&mut self, symbol: Symbol) -> PResult<'sess, LitKind> {
130        use LitError::*;
131        match parse_rational(symbol) {
132            Ok(l) => Ok(l),
133            // User error.
134            Err(
135                e @ (EmptyRational | RationalTooLarge | ExponentTooLarge | IntegerLeadingZeros),
136            ) => Err(self.dcx().err(e.to_string())),
137            // User error, but already emitted.
138            Err(EmptyExponent) => Ok(LitKind::Err(ErrorGuaranteed::new_unchecked())),
139            // Lexer internal error.
140            Err(e @ (ParseExponent(_) | ParseInteger(_) | ParseRational(_) | EmptyInteger)) => {
141                panic!("failed to parse rational literal {symbol:?}: {e}")
142            }
143        }
144    }
145
146    /// Parses a string literal.
147    fn parse_lit_str(&mut self, lit: TokenLit) -> PResult<'sess, LitKind> {
148        let mode = match lit.kind {
149            TokenLitKind::Str => unescape::Mode::Str,
150            TokenLitKind::UnicodeStr => unescape::Mode::UnicodeStr,
151            TokenLitKind::HexStr => unescape::Mode::HexStr,
152            _ => unreachable!(),
153        };
154
155        let mut value = unescape::parse_string_literal(lit.symbol.as_str(), mode);
156        while let Some(TokenLit { symbol, kind }) = self.token.lit() {
157            if kind != lit.kind {
158                break;
159            }
160            value
161                .to_mut()
162                .extend_from_slice(&unescape::parse_string_literal(symbol.as_str(), mode));
163            self.bump();
164        }
165
166        let kind = match lit.kind {
167            TokenLitKind::Str => StrKind::Str,
168            TokenLitKind::UnicodeStr => StrKind::Unicode,
169            TokenLitKind::HexStr => StrKind::Hex,
170            _ => unreachable!(),
171        };
172        Ok(LitKind::Str(kind, value.into()))
173    }
174}
175
176#[derive(Debug, PartialEq, Eq)]
177enum LitError {
178    EmptyInteger,
179    EmptyRational,
180    EmptyExponent,
181
182    ParseInteger(num_bigint::ParseBigIntError),
183    ParseRational(num_bigint::ParseBigIntError),
184    ParseExponent(num_bigint::ParseBigIntError),
185
186    RationalTooLarge,
187    ExponentTooLarge,
188    IntegerLeadingZeros,
189}
190
191impl fmt::Display for LitError {
192    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
193        match self {
194            Self::EmptyInteger => write!(f, "empty integer"),
195            Self::EmptyRational => write!(f, "empty rational"),
196            Self::EmptyExponent => write!(f, "empty exponent"),
197            Self::ParseInteger(e) => write!(f, "failed to parse integer: {e}"),
198            Self::ParseRational(e) => write!(f, "failed to parse rational: {e}"),
199            Self::ParseExponent(e) => write!(f, "failed to parse exponent: {e}"),
200            Self::RationalTooLarge => write!(f, "rational part too large"),
201            Self::ExponentTooLarge => write!(f, "exponent too large"),
202            Self::IntegerLeadingZeros => write!(f, "leading zeros are not allowed in integers"),
203        }
204    }
205}
206
207fn parse_integer(symbol: Symbol) -> Result<LitKind, LitError> {
208    /// Primitive type to use for fast-path parsing.
209    type Primitive = u128;
210
211    const fn max_len(base: u32) -> u32 {
212        Primitive::MAX.ilog(base as Primitive) + 1
213    }
214
215    let s = &strip_underscores(&symbol)[..];
216    let (base, fast_path_len) = match s.as_bytes() {
217        [b'0', b'x', ..] => (16, const { max_len(16) }),
218        [b'0', b'o', ..] => (8, const { max_len(8) }),
219        [b'0', b'b', ..] => (2, const { max_len(2) }),
220        _ => (10, const { max_len(10) }),
221    };
222
223    if base == 10 && s.starts_with('0') && s.len() > 1 {
224        return Err(LitError::IntegerLeadingZeros);
225    }
226
227    // Address literal.
228    if base == 16 && s.len() == 42 {
229        match Address::parse_checksummed(s, None) {
230            Ok(address) => return Ok(LitKind::Address(address)),
231            // Continue parsing as a number to emit better errors.
232            Err(alloy_primitives::AddressError::InvalidChecksum) => {}
233            Err(alloy_primitives::AddressError::Hex(_)) => {}
234        }
235    }
236
237    let start = if base == 10 { 0 } else { 2 };
238    let s = &s[start..];
239    if s.is_empty() {
240        return Err(LitError::EmptyInteger);
241    }
242    if s.len() <= fast_path_len as usize {
243        if let Ok(n) = Primitive::from_str_radix(s, base) {
244            return Ok(LitKind::Number(BigInt::from(n)));
245        }
246    }
247    BigInt::from_str_radix(s, base).map(LitKind::Number).map_err(LitError::ParseInteger)
248}
249
250fn parse_rational(symbol: Symbol) -> Result<LitKind, LitError> {
251    let s = &strip_underscores(&symbol)[..];
252    debug_assert!(!s.is_empty());
253
254    let (mut int, rat, exp) = match (s.find('.'), s.find(['e', 'E'])) {
255        // X
256        (None, None) => (s, None, None),
257        // X.Y
258        (Some(dot), None) => {
259            let (int, rat) = split_at_exclusive(s, dot);
260            (int, Some(rat), None)
261        }
262        // XeZ
263        (None, Some(exp)) => {
264            let (int, exp) = split_at_exclusive(s, exp);
265            (int, None, Some(exp))
266        }
267        // X.YeZ
268        (Some(dot), Some(exp)) => {
269            debug_assert!(exp > dot);
270            let (int, rest) = split_at_exclusive(s, dot);
271            let (rat, exp) = split_at_exclusive(rest, exp - dot - 1);
272            (int, Some(rat), Some(exp))
273        }
274    };
275
276    if cfg!(debug_assertions) {
277        let mut reconstructed = String::from(int);
278        if let Some(rat) = rat {
279            reconstructed.push('.');
280            reconstructed.push_str(rat);
281        }
282        if let Some(exp) = exp {
283            let e = if s.contains('E') { 'E' } else { 'e' };
284            reconstructed.push(e);
285            reconstructed.push_str(exp);
286        }
287        assert_eq!(reconstructed, s, "{int:?} + {rat:?} + {exp:?}");
288    }
289
290    // `int` is allowed to be empty: `.1e1` is the same as `0.1e1`.
291    if int.is_empty() {
292        int = "0";
293    }
294    if rat.is_some_and(str::is_empty) {
295        return Err(LitError::EmptyRational);
296    }
297    if exp.is_some_and(str::is_empty) {
298        return Err(LitError::EmptyExponent);
299    }
300
301    if int.starts_with('0') && int.len() > 1 {
302        return Err(LitError::IntegerLeadingZeros);
303    }
304    // NOTE: leading zeros are allowed in the rational and exponent parts.
305
306    let rat = rat.map(|rat| rat.trim_end_matches('0'));
307
308    let int = match rat {
309        Some(rat) => {
310            let s = [int, rat].concat();
311            BigInt::from_str_radix(&s, 10).map_err(LitError::ParseRational)
312        }
313        None => BigInt::from_str_radix(int, 10).map_err(LitError::ParseInteger),
314    }?;
315
316    let fract_len = rat.map_or(0, str::len);
317    let fract_len = u16::try_from(fract_len).map_err(|_| LitError::RationalTooLarge)?;
318    let denominator = BigInt::from(10u64).pow(fract_len as u32);
319    let mut number = BigRational::new(int, denominator);
320
321    if let Some(exp) = exp {
322        let exp = BigInt::from_str_radix(exp, 10).map_err(LitError::ParseExponent)?;
323        let exp = i16::try_from(exp).map_err(|_| LitError::ExponentTooLarge)?;
324        // NOTE: Calculating exponents greater than i16 might perform better with a manual loop.
325        let ten = BigInt::from(10u64);
326        if exp.is_negative() {
327            number /= ten.pow((-exp) as u32);
328        } else {
329            number *= ten.pow(exp as u32);
330        }
331    }
332
333    if number.is_integer() {
334        Ok(LitKind::Number(number.to_integer()))
335    } else {
336        Ok(LitKind::Rational(number))
337    }
338}
339
340#[track_caller]
341fn split_at_exclusive(s: &str, idx: usize) -> (&str, &str) {
342    if !s.is_char_boundary(idx) || !s.is_char_boundary(idx + 1) {
343        panic!();
344    }
345    unsafe { (s.get_unchecked(..idx), s.get_unchecked(idx + 1..)) }
346}
347
348#[inline]
349fn strip_underscores(symbol: &Symbol) -> Cow<'_, str> {
350    // Do not allocate a new string unless necessary.
351    let s = symbol.as_str();
352    if s.contains('_') {
353        let mut s = s.to_string();
354        s.retain(|c| c != '_');
355        return Cow::Owned(s);
356    }
357    Cow::Borrowed(s)
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363    use crate::Lexer;
364    use alloy_primitives::address;
365    use solar_interface::Session;
366
367    // String literal parsing is tested in ../lexer/mod.rs.
368
369    // Run through the lexer to get the same input that the parser gets.
370    #[track_caller]
371    fn lex_literal(src: &str) -> Symbol {
372        let sess = Session::builder().with_test_emitter().build();
373        let tokens = Lexer::new(&sess, src).into_tokens();
374        sess.dcx.has_errors().unwrap();
375        assert_eq!(tokens.len(), 1, "expected exactly 1 token {tokens:?}");
376        tokens[0].lit().expect("not a literal").symbol
377    }
378
379    #[test]
380    fn integer() {
381        use LitError::*;
382
383        #[track_caller]
384        fn check_int(src: &str, expected: Result<&str, LitError>) {
385            let symbol = lex_literal(src);
386            let res = match parse_integer(symbol) {
387                Ok(LitKind::Number(n)) => Ok(n),
388                Ok(x) => panic!("not a number: {x:?} ({src:?})"),
389                Err(e) => Err(e),
390            };
391            let expected = match expected {
392                Ok(s) => Ok(BigInt::from_str_radix(s, 10).unwrap()),
393                Err(e) => Err(e),
394            };
395            assert_eq!(res, expected, "{src:?}");
396        }
397
398        #[track_caller]
399        fn check_address(src: &str, expected: Result<Address, &str>) {
400            let symbol = lex_literal(src);
401            match expected {
402                Ok(address) => match parse_integer(symbol) {
403                    Ok(LitKind::Address(a)) => assert_eq!(a, address, "{src:?}"),
404                    e => panic!("not an address: {e:?} ({src:?})"),
405                },
406                Err(int) => match parse_integer(symbol) {
407                    Ok(LitKind::Number(n)) => {
408                        assert_eq!(n, BigInt::from_str_radix(int, 10).unwrap(), "{src:?}")
409                    }
410                    e => panic!("not an integer: {e:?} ({src:?})"),
411                },
412            }
413        }
414
415        solar_interface::enter(|| {
416            check_int("00", Err(IntegerLeadingZeros));
417            check_int("01", Err(IntegerLeadingZeros));
418            check_int("00", Err(IntegerLeadingZeros));
419            check_int("001", Err(IntegerLeadingZeros));
420            check_int("000", Err(IntegerLeadingZeros));
421            check_int("0001", Err(IntegerLeadingZeros));
422
423            check_int("0", Ok("0"));
424            check_int("1", Ok("1"));
425
426            // check("0b10", Ok("2"));
427            // check("0o10", Ok("8"));
428            check_int("10", Ok("10"));
429            check_int("0x10", Ok("16"));
430
431            check_address("0x00000000000000000000000000000000000000", Err("0"));
432            check_address("0x000000000000000000000000000000000000000", Err("0"));
433            check_address("0x0000000000000000000000000000000000000000", Ok(Address::ZERO));
434            check_address("0x00000000000000000000000000000000000000000", Err("0"));
435            check_address("0x000000000000000000000000000000000000000000", Err("0"));
436            check_address(
437                "0x0000000000000000000000000000000000000001",
438                Ok(Address::with_last_byte(1)),
439            );
440
441            check_address(
442                "0x52908400098527886E0F7030069857D2E4169EE7",
443                Ok(address!("52908400098527886E0F7030069857D2E4169EE7")),
444            );
445            check_address(
446                "0x52908400098527886E0F7030069857D2E4169Ee7",
447                Err("471360049350540672339372329809862569580528312039"),
448            );
449
450            check_address(
451                "0x8617E340B3D01FA5F11F306F4090FD50E238070D",
452                Ok(address!("8617E340B3D01FA5F11F306F4090FD50E238070D")),
453            );
454            check_address(
455                "0xde709f2102306220921060314715629080e2fb77",
456                Ok(address!("de709f2102306220921060314715629080e2fb77")),
457            );
458            check_address(
459                "0x27b1fdb04752bbc536007a920d24acb045561c26",
460                Ok(address!("27b1fdb04752bbc536007a920d24acb045561c26")),
461            );
462            check_address(
463                "0x5aAeb6053F3E94C9b9A09f33669435E7Ef1BeAed",
464                Ok(address!("5aAeb6053F3E94C9b9A09f33669435E7Ef1BeAed")),
465            );
466            check_address(
467                "0xfB6916095ca1df60bB79Ce92cE3Ea74c37c5d359",
468                Ok(address!("fB6916095ca1df60bB79Ce92cE3Ea74c37c5d359")),
469            );
470            check_address(
471                "0xdbF03B407c01E7cD3CBea99509d93f8DDDC8C6FB",
472                Ok(address!("dbF03B407c01E7cD3CBea99509d93f8DDDC8C6FB")),
473            );
474            check_address(
475                "0xD1220A0cf47c7B9Be7A2E6BA89F429762e7b9aDb",
476                Ok(address!("D1220A0cf47c7B9Be7A2E6BA89F429762e7b9aDb")),
477            );
478        });
479    }
480
481    #[test]
482    fn rational() {
483        use LitError::*;
484
485        #[track_caller]
486        fn check_int(src: &str, expected: Result<&str, LitError>) {
487            let symbol = lex_literal(src);
488            let res = match parse_rational(symbol) {
489                Ok(LitKind::Number(r)) => Ok(r),
490                Ok(x) => panic!("not a number: {x:?} ({src:?})"),
491                Err(e) => Err(e),
492            };
493            let expected = match expected {
494                Ok(s) => Ok(BigInt::from_str_radix(s, 10).unwrap()),
495                Err(e) => Err(e),
496            };
497            assert_eq!(res, expected, "{src:?}");
498        }
499
500        #[track_caller]
501        fn check_rat(src: &str, expected: Result<&str, LitError>) {
502            let symbol = lex_literal(src);
503            let res = match parse_rational(symbol) {
504                Ok(LitKind::Rational(r)) => Ok(r),
505                Ok(x) => panic!("not a number: {x:?} ({src:?})"),
506                Err(e) => Err(e),
507            };
508            let expected = match expected {
509                Ok(s) => Ok(BigRational::from_str_radix(s, 10).unwrap()),
510                Err(e) => Err(e),
511            };
512            assert_eq!(res, expected, "{src:?}");
513        }
514
515        solar_interface::enter(|| {
516            check_int("00", Err(IntegerLeadingZeros));
517            check_int("0_0", Err(IntegerLeadingZeros));
518            check_int("01", Err(IntegerLeadingZeros));
519            check_int("0_1", Err(IntegerLeadingZeros));
520            check_int("00", Err(IntegerLeadingZeros));
521            check_int("001", Err(IntegerLeadingZeros));
522            check_int("000", Err(IntegerLeadingZeros));
523            check_int("0001", Err(IntegerLeadingZeros));
524            check_int("0e999999", Err(ExponentTooLarge));
525
526            check_int("0.", Err(EmptyRational));
527
528            check_int("0", Ok("0"));
529            check_int("0e0", Ok("0"));
530            check_int("0.0", Ok("0"));
531            check_int("0.00", Ok("0"));
532            check_int("0.0e0", Ok("0"));
533            check_int("0.00e0", Ok("0"));
534            check_int("0.0e00", Ok("0"));
535            check_int("0.00e00", Ok("0"));
536            check_int("0.0e-0", Ok("0"));
537            check_int("0.00e-0", Ok("0"));
538            check_int("0.0e-00", Ok("0"));
539            check_int("0.00e-00", Ok("0"));
540            check_int("0.0e1", Ok("0"));
541            check_int("0.00e1", Ok("0"));
542            check_int("0.00e01", Ok("0"));
543
544            check_int(".0", Ok("0"));
545            check_int(".00", Ok("0"));
546            check_int(".0e0", Ok("0"));
547            check_int(".00e0", Ok("0"));
548            check_int(".0e00", Ok("0"));
549            check_int(".00e00", Ok("0"));
550            check_int(".0e-0", Ok("0"));
551            check_int(".00e-0", Ok("0"));
552            check_int(".0e-00", Ok("0"));
553            check_int(".00e-00", Ok("0"));
554            check_int(".0e1", Ok("0"));
555            check_int(".00e1", Ok("0"));
556            check_int(".00e01", Ok("0"));
557
558            check_int("1", Ok("1"));
559            check_int("1e0", Ok("1"));
560            check_int("1.0", Ok("1"));
561            check_int("1.00", Ok("1"));
562            check_int("1.0e0", Ok("1"));
563            check_int("1.00e0", Ok("1"));
564            check_int("1.0e00", Ok("1"));
565            check_int("1.00e00", Ok("1"));
566            check_int("1.0e-0", Ok("1"));
567            check_int("1.00e-0", Ok("1"));
568            check_int("1.0e-00", Ok("1"));
569            check_int("1.00e-00", Ok("1"));
570
571            check_int("1e1", Ok("10"));
572            check_int("1.0e1", Ok("10"));
573            check_int("1.00e1", Ok("10"));
574            check_int("1.00e01", Ok("10"));
575
576            check_int("1.1e1", Ok("11"));
577            check_int("1.10e1", Ok("11"));
578            check_int("1.100e1", Ok("11"));
579            check_int("1.2e1", Ok("12"));
580            check_int("1.200e1", Ok("12"));
581
582            check_rat("1e-1", Ok("1/10"));
583            check_rat("1e-2", Ok("1/100"));
584            check_rat("1e-3", Ok("1/1000"));
585            check_rat("1.0e-1", Ok("1/10"));
586            check_rat("1.0e-2", Ok("1/100"));
587            check_rat("1.0e-3", Ok("1/1000"));
588            check_rat("1.1e-1", Ok("11/100"));
589            check_rat("1.1e-2", Ok("11/1000"));
590            check_rat("1.1e-3", Ok("11/10000"));
591
592            check_rat("1.1", Ok("11/10"));
593            check_rat("1.10", Ok("11/10"));
594            check_rat("1.100", Ok("11/10"));
595            check_rat("1.2", Ok("12/10"));
596            check_rat("1.20", Ok("12/10"));
597        });
598    }
599}