leo_parser_lossless/
lib.rs

1// Copyright (C) 2019-2025 Provable Inc.
2// This file is part of the Leo library.
3
4// The Leo library is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// The Leo library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
16
17//! The lossless syntax tree and parser for Leo.
18
19use itertools::Itertools as _;
20use lalrpop_util::lalrpop_mod;
21
22use leo_errors::{Handler, LeoError, ParserError, Result};
23use leo_span::Span;
24
25// Comment me during release.
26//lalrpop_mod!(pub grammar);
27// Uncomment me during release and be sure to generate and copy `grammar.rs` into `src/`.
28pub mod grammar;
29
30pub mod tokens;
31
32use tokens::*;
33
34/// A tag indicating the nature of a syntax node.
35#[derive(Clone, Copy, Debug, PartialEq, Eq)]
36pub enum SyntaxKind {
37    Whitespace,
38    Linebreak,
39    CommentLine,
40    CommentBlock,
41
42    Expression(ExpressionKind),
43    StructMemberInitializer,
44
45    Statement(StatementKind),
46    Type(TypeKind),
47    Token,
48
49    Annotation,
50    AnnotationMember,
51    AnnotationList,
52
53    Parameter,
54    ParameterList,
55    FunctionOutput,
56    FunctionOutputs,
57    Function,
58    Constructor,
59
60    ConstParameter,
61    ConstParameterList,
62    ConstArgumentList,
63
64    StructDeclaration,
65    StructMemberDeclaration,
66    StructMemberDeclarationList,
67
68    Mapping,
69    Storage,
70
71    GlobalConst,
72
73    Import,
74    MainContents,
75    ModuleContents,
76    ProgramDeclaration,
77}
78
79#[derive(Copy, Clone, Debug, PartialEq, Eq)]
80pub enum IntegerLiteralKind {
81    U8,
82    U16,
83    U32,
84    U64,
85    U128,
86
87    I8,
88    I16,
89    I32,
90    I64,
91    I128,
92}
93
94#[derive(Copy, Clone, Debug, PartialEq, Eq)]
95pub enum IntegerTypeKind {
96    U8,
97    U16,
98    U32,
99    U64,
100    U128,
101
102    I8,
103    I16,
104    I32,
105    I64,
106    I128,
107}
108
109#[derive(Clone, Copy, Debug, PartialEq, Eq)]
110pub enum TypeKind {
111    Address,
112    Array,
113    Boolean,
114    Composite,
115    Field,
116    Future,
117    Group,
118    Identifier,
119    Integer(IntegerTypeKind),
120    Mapping,
121    Optional,
122    Scalar,
123    Signature,
124    String,
125    Tuple,
126    Vector,
127    Numeric,
128    Unit,
129}
130
131impl From<TypeKind> for SyntaxKind {
132    fn from(value: TypeKind) -> Self {
133        SyntaxKind::Type(value)
134    }
135}
136
137impl From<IntegerTypeKind> for TypeKind {
138    fn from(value: IntegerTypeKind) -> Self {
139        TypeKind::Integer(value)
140    }
141}
142
143impl From<IntegerTypeKind> for SyntaxKind {
144    fn from(value: IntegerTypeKind) -> Self {
145        SyntaxKind::Type(TypeKind::Integer(value))
146    }
147}
148
149#[derive(Clone, Copy, Debug, PartialEq, Eq)]
150pub enum ExpressionKind {
151    ArrayAccess,
152    AssociatedConstant,
153    AssociatedFunctionCall,
154    Async,
155    Array,
156    Binary,
157    Call,
158    Cast,
159    Path,
160    Literal(LiteralKind),
161    Locator,
162    MemberAccess,
163    MethodCall,
164    Parenthesized,
165    Repeat,
166    // program.id, block.height, etc
167    SpecialAccess,
168    Struct,
169    Ternary,
170    Tuple,
171    TupleAccess,
172    Unary,
173    Unit,
174}
175
176#[derive(Clone, Copy, Debug, PartialEq, Eq)]
177pub enum LiteralKind {
178    Address,
179    Boolean,
180    Field,
181    Group,
182    Integer(IntegerLiteralKind),
183    None,
184    Scalar,
185    Unsuffixed,
186    String,
187}
188
189impl From<ExpressionKind> for SyntaxKind {
190    fn from(value: ExpressionKind) -> Self {
191        SyntaxKind::Expression(value)
192    }
193}
194
195impl From<LiteralKind> for ExpressionKind {
196    fn from(value: LiteralKind) -> Self {
197        ExpressionKind::Literal(value)
198    }
199}
200
201impl From<LiteralKind> for SyntaxKind {
202    fn from(value: LiteralKind) -> Self {
203        SyntaxKind::Expression(ExpressionKind::Literal(value))
204    }
205}
206
207impl From<IntegerLiteralKind> for LiteralKind {
208    fn from(value: IntegerLiteralKind) -> Self {
209        LiteralKind::Integer(value)
210    }
211}
212
213impl From<IntegerLiteralKind> for ExpressionKind {
214    fn from(value: IntegerLiteralKind) -> Self {
215        ExpressionKind::Literal(LiteralKind::Integer(value))
216    }
217}
218
219impl From<IntegerLiteralKind> for SyntaxKind {
220    fn from(value: IntegerLiteralKind) -> Self {
221        SyntaxKind::Expression(ExpressionKind::Literal(LiteralKind::Integer(value)))
222    }
223}
224
225#[derive(Clone, Copy, Debug, PartialEq, Eq)]
226pub enum StatementKind {
227    Assert,
228    AssertEq,
229    AssertNeq,
230    Assign,
231    Block,
232    Conditional,
233    Const,
234    Definition,
235    Expression,
236    Iteration,
237    Return,
238}
239
240impl From<StatementKind> for SyntaxKind {
241    fn from(value: StatementKind) -> Self {
242        SyntaxKind::Statement(value)
243    }
244}
245
246/// An untyped node in the lossless syntax tree.
247#[derive(Debug, Clone)]
248pub struct SyntaxNode<'a> {
249    /// A tag indicating the nature of the node.
250    pub kind: SyntaxKind,
251    /// The text from the source if applicable.
252    pub text: &'a str,
253    pub span: leo_span::Span,
254    pub children: Vec<SyntaxNode<'a>>,
255}
256
257impl<'a> SyntaxNode<'a> {
258    fn new_token(kind: SyntaxKind, token: LalrToken<'a>, children: Vec<Self>) -> Self {
259        Self { kind, text: token.text, span: token.span, children }
260    }
261
262    fn new(kind: impl Into<SyntaxKind>, children: impl IntoIterator<Item = Self>) -> Self {
263        let children: Vec<Self> = children.into_iter().collect();
264        let lo = children.first().unwrap().span.lo;
265        let hi = children.last().unwrap().span.hi;
266        let span = leo_span::Span { lo, hi };
267        Self { kind: kind.into(), text: "", span, children }
268    }
269
270    fn suffixed_literal(integer: LalrToken<'a>, suffix: LalrToken<'a>, children: Vec<Self>) -> Self {
271        let kind: SyntaxKind = match suffix.token {
272            Token::Field => LiteralKind::Field.into(),
273            Token::Group => LiteralKind::Group.into(),
274            Token::Scalar => LiteralKind::Scalar.into(),
275            Token::I8 => IntegerLiteralKind::I8.into(),
276            Token::I16 => IntegerLiteralKind::I16.into(),
277            Token::I32 => IntegerLiteralKind::I32.into(),
278            Token::I64 => IntegerLiteralKind::I64.into(),
279            Token::I128 => IntegerLiteralKind::I128.into(),
280            Token::U8 => IntegerLiteralKind::U8.into(),
281            Token::U16 => IntegerLiteralKind::U16.into(),
282            Token::U32 => IntegerLiteralKind::U32.into(),
283            Token::U64 => IntegerLiteralKind::U64.into(),
284            Token::U128 => IntegerLiteralKind::U128.into(),
285            x => panic!("Error in grammar.lalrpop: {x:?}"),
286        };
287
288        let lo = integer.span.lo;
289        let hi = suffix.span.hi;
290        let span = leo_span::Span { lo, hi };
291
292        Self { kind, text: integer.text, span, children }
293    }
294
295    fn binary_expression(lhs: Self, op: Self, rhs: Self) -> Self {
296        let span = leo_span::Span { lo: lhs.span.lo, hi: rhs.span.hi };
297        let children = vec![lhs, op, rhs];
298        SyntaxNode { kind: ExpressionKind::Binary.into(), text: "", span, children }
299    }
300}
301
302fn two_path_components(text: &str) -> Option<(&str, &str)> {
303    let mut iter = text.split("::");
304
305    match (iter.next(), iter.next(), iter.next()) {
306        (Some(first), Some(second), _) => Some((first, second)),
307        _ => None,
308    }
309}
310
311pub fn parse_expression<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
312    let parser = grammar::ExprParser::new();
313    parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
314}
315
316pub fn parse_statement<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
317    let parser = grammar::StatementParser::new();
318    parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
319}
320
321pub fn parse_module<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
322    let parser = grammar::ModuleContentsParser::new();
323    parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
324}
325
326pub fn parse_main<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
327    let parser = grammar::MainContentsParser::new();
328    parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
329}
330
331fn check_identifier(token: &LalrToken<'_>, handler: &Handler) {
332    const MAX_IDENTIFIER_LEN: usize = 31usize;
333    if token.token == Token::IdVariants(IdVariants::Identifier) {
334        if token.text.len() > MAX_IDENTIFIER_LEN {
335            handler.emit_err(leo_errors::ParserError::identifier_too_long(
336                token.text,
337                token.text.len(),
338                MAX_IDENTIFIER_LEN,
339                token.span,
340            ));
341        }
342        // These are reserved for compiler-generated names.
343        if token.text.contains("__") {
344            handler.emit_err(ParserError::identifier_cannot_contain_double_underscore(token.text, token.span));
345        }
346    }
347}
348
349fn parse_general<'a>(
350    handler: Handler,
351    source: &'a str,
352    start_pos: u32,
353    parse: impl FnOnce(
354        &mut Lexer<'a>,
355    ) -> Result<SyntaxNode<'a>, lalrpop_util::ParseError<usize, LalrToken<'a>, &'static str>>,
356) -> Result<SyntaxNode<'a>> {
357    let mut lexer = tokens::Lexer::new(source, start_pos, handler.clone());
358    match parse(&mut lexer) {
359        Ok(val) => {
360            handler.last_err()?;
361            Ok(val)
362        }
363        Err(e) => {
364            if matches!(e, lalrpop_util::ParseError::UnrecognizedEof { .. }) {
365                // We don't want to redundantly report the EOF error, when the meaningfull
366                // errors are recorded in the handler.
367                handler.last_err()?;
368            }
369            Err(convert(e, source, start_pos))
370        }
371    }
372}
373
374// We can't implement From<lalrpop_util::ParseError> since both that
375// trait and leo_errors::Error are defined in other crates.
376fn convert(
377    error: lalrpop_util::ParseError<usize, LalrToken<'_>, &'static str>,
378    source: &str,
379    start_pos: u32,
380) -> LeoError {
381    match error {
382        lalrpop_util::ParseError::UnrecognizedToken { token, expected } => {
383            let expected = expected.iter().flat_map(|s| tokens::Token::str_user(s)).format(", ");
384            ParserError::unexpected(token.1.text, expected, token.1.span).into()
385        }
386        lalrpop_util::ParseError::UnrecognizedEof { location, .. } => {
387            let (lo, hi) = if source.is_empty() {
388                (start_pos, start_pos)
389            } else if location >= source.len() + start_pos as usize {
390                // Generally lalrpop reports the `location` for this error as
391                // one character past the end of the source. So let's
392                // back up one character.
393                // Can't just subtract 1 as we may not be on a character boundary.
394                let lo = source.char_indices().last().unwrap().0 as u32 + start_pos;
395                (lo, lo + 1)
396            } else {
397                (location as u32, location as u32 + 1)
398            };
399            ParserError::unexpected_eof(Span { lo, hi }).into()
400        }
401        x => panic!("ERR: {x:?}"),
402    }
403}