Skip to main content

leo_parser_lossless/
lib.rs

1// Copyright (C) 2019-2026 Provable Inc.
2// This file is part of the Leo library.
3
4// The Leo library is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// The Leo library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
16
17//! The lossless syntax tree and parser for Leo.
18
19use itertools::Itertools as _;
20use leo_errors::{Handler, LeoError, ParserError, Result};
21use leo_span::Span;
22
23// Comment me when running `cargo publish`.
24// use lalrpop_util::lalrpop_mod;
25// lalrpop_mod!(pub grammar);
26// Uncomment me when running `cargo publish` and be sure to generate and copy `grammar.rs` from `target/` into `src/`.
27pub mod grammar;
28
29pub mod tokens;
30
31use tokens::*;
32
33/// A tag indicating the nature of a syntax node.
34#[derive(Clone, Copy, Debug, PartialEq, Eq)]
35pub enum SyntaxKind {
36    Whitespace,
37    Linebreak,
38    CommentLine,
39    CommentBlock,
40
41    Expression(ExpressionKind),
42    CompositeMemberInitializer,
43
44    Statement(StatementKind),
45    Type(TypeKind),
46    Token,
47
48    Annotation,
49    AnnotationMember,
50    AnnotationList,
51
52    Parameter,
53    ParameterList,
54    FunctionOutput,
55    FunctionOutputs,
56    Function,
57    Constructor,
58
59    ConstParameter,
60    ConstParameterList,
61    ConstArgumentList,
62
63    CompositeDeclaration,
64    CompositeMemberDeclaration,
65    CompositeMemberDeclarationList,
66
67    Mapping,
68    Storage,
69
70    GlobalConst,
71
72    Import,
73    MainContents,
74    ModuleContents,
75    ProgramDeclaration,
76}
77
78#[derive(Copy, Clone, Debug, PartialEq, Eq)]
79pub enum IntegerLiteralKind {
80    U8,
81    U16,
82    U32,
83    U64,
84    U128,
85
86    I8,
87    I16,
88    I32,
89    I64,
90    I128,
91}
92
93#[derive(Copy, Clone, Debug, PartialEq, Eq)]
94pub enum IntegerTypeKind {
95    U8,
96    U16,
97    U32,
98    U64,
99    U128,
100
101    I8,
102    I16,
103    I32,
104    I64,
105    I128,
106}
107
108#[derive(Clone, Copy, Debug, PartialEq, Eq)]
109pub enum TypeKind {
110    Address,
111    Array,
112    Boolean,
113    Composite,
114    Field,
115    Future,
116    Group,
117    Identifier,
118    Integer(IntegerTypeKind),
119    Mapping,
120    Optional,
121    Scalar,
122    Signature,
123    String,
124    Tuple,
125    Vector,
126    Numeric,
127    Unit,
128}
129
130impl From<TypeKind> for SyntaxKind {
131    fn from(value: TypeKind) -> Self {
132        SyntaxKind::Type(value)
133    }
134}
135
136impl From<IntegerTypeKind> for TypeKind {
137    fn from(value: IntegerTypeKind) -> Self {
138        TypeKind::Integer(value)
139    }
140}
141
142impl From<IntegerTypeKind> for SyntaxKind {
143    fn from(value: IntegerTypeKind) -> Self {
144        SyntaxKind::Type(TypeKind::Integer(value))
145    }
146}
147
148#[derive(Clone, Copy, Debug, PartialEq, Eq)]
149pub enum ExpressionKind {
150    ArrayAccess,
151    AssociatedConstant,
152    AssociatedFunctionCall,
153    Async,
154    Array,
155    Binary,
156    Call,
157    Cast,
158    Path,
159    Literal(LiteralKind),
160    MemberAccess,
161    MethodCall,
162    Parenthesized,
163    Repeat,
164    Intrinsic,
165    SpecialAccess, // TODO: fold into Intrinsic
166    Composite,
167    Ternary,
168    Tuple,
169    TupleAccess,
170    Unary,
171    Unit,
172}
173
174#[derive(Clone, Copy, Debug, PartialEq, Eq)]
175pub enum LiteralKind {
176    Address,
177    Boolean,
178    Field,
179    Group,
180    Integer(IntegerLiteralKind),
181    None,
182    Scalar,
183    Unsuffixed,
184    String,
185}
186
187impl From<ExpressionKind> for SyntaxKind {
188    fn from(value: ExpressionKind) -> Self {
189        SyntaxKind::Expression(value)
190    }
191}
192
193impl From<LiteralKind> for ExpressionKind {
194    fn from(value: LiteralKind) -> Self {
195        ExpressionKind::Literal(value)
196    }
197}
198
199impl From<LiteralKind> for SyntaxKind {
200    fn from(value: LiteralKind) -> Self {
201        SyntaxKind::Expression(ExpressionKind::Literal(value))
202    }
203}
204
205impl From<IntegerLiteralKind> for LiteralKind {
206    fn from(value: IntegerLiteralKind) -> Self {
207        LiteralKind::Integer(value)
208    }
209}
210
211impl From<IntegerLiteralKind> for ExpressionKind {
212    fn from(value: IntegerLiteralKind) -> Self {
213        ExpressionKind::Literal(LiteralKind::Integer(value))
214    }
215}
216
217impl From<IntegerLiteralKind> for SyntaxKind {
218    fn from(value: IntegerLiteralKind) -> Self {
219        SyntaxKind::Expression(ExpressionKind::Literal(LiteralKind::Integer(value)))
220    }
221}
222
223#[derive(Clone, Copy, Debug, PartialEq, Eq)]
224pub enum StatementKind {
225    Assert,
226    AssertEq,
227    AssertNeq,
228    Assign,
229    Block,
230    Conditional,
231    Const,
232    Definition,
233    Expression,
234    Iteration,
235    Return,
236}
237
238impl From<StatementKind> for SyntaxKind {
239    fn from(value: StatementKind) -> Self {
240        SyntaxKind::Statement(value)
241    }
242}
243
244/// An untyped node in the lossless syntax tree.
245#[derive(Debug, Clone)]
246pub struct SyntaxNode<'a> {
247    /// A tag indicating the nature of the node.
248    pub kind: SyntaxKind,
249    /// The text from the source if applicable.
250    pub text: &'a str,
251    pub span: leo_span::Span,
252    pub children: Vec<SyntaxNode<'a>>,
253}
254
255impl<'a> SyntaxNode<'a> {
256    fn new_token(kind: SyntaxKind, token: LalrToken<'a>, children: Vec<Self>) -> Self {
257        Self { kind, text: token.text, span: token.span, children }
258    }
259
260    fn new(kind: impl Into<SyntaxKind>, children: impl IntoIterator<Item = Self>) -> Self {
261        let children: Vec<Self> = children.into_iter().collect();
262        let lo = children.first().unwrap().span.lo;
263        let hi = children.last().unwrap().span.hi;
264        let span = leo_span::Span { lo, hi };
265        Self { kind: kind.into(), text: "", span, children }
266    }
267
268    fn suffixed_literal(integer: LalrToken<'a>, suffix: LalrToken<'a>, children: Vec<Self>) -> Self {
269        let kind: SyntaxKind = match suffix.token {
270            Token::Field => LiteralKind::Field.into(),
271            Token::Group => LiteralKind::Group.into(),
272            Token::Scalar => LiteralKind::Scalar.into(),
273            Token::I8 => IntegerLiteralKind::I8.into(),
274            Token::I16 => IntegerLiteralKind::I16.into(),
275            Token::I32 => IntegerLiteralKind::I32.into(),
276            Token::I64 => IntegerLiteralKind::I64.into(),
277            Token::I128 => IntegerLiteralKind::I128.into(),
278            Token::U8 => IntegerLiteralKind::U8.into(),
279            Token::U16 => IntegerLiteralKind::U16.into(),
280            Token::U32 => IntegerLiteralKind::U32.into(),
281            Token::U64 => IntegerLiteralKind::U64.into(),
282            Token::U128 => IntegerLiteralKind::U128.into(),
283            x => panic!("Error in grammar.lalrpop: {x:?}"),
284        };
285
286        let lo = integer.span.lo;
287        let hi = suffix.span.hi;
288        let span = leo_span::Span { lo, hi };
289
290        Self { kind, text: integer.text, span, children }
291    }
292
293    fn binary_expression(lhs: Self, op: Self, rhs: Self) -> Self {
294        let span = leo_span::Span { lo: lhs.span.lo, hi: rhs.span.hi };
295        let children = vec![lhs, op, rhs];
296        SyntaxNode { kind: ExpressionKind::Binary.into(), text: "", span, children }
297    }
298}
299
300fn two_path_components(text: &str) -> Option<(&str, &str)> {
301    let mut iter = text.split("::");
302
303    match (iter.next(), iter.next(), iter.next()) {
304        (Some(first), Some(second), _) => Some((first, second)),
305        _ => None,
306    }
307}
308
309pub fn parse_expression<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
310    let parser = grammar::ExprParser::new();
311    parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
312}
313
314pub fn parse_statement<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
315    let parser = grammar::StatementParser::new();
316    parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
317}
318
319pub fn parse_module<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
320    let parser = grammar::ModuleContentsParser::new();
321    parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
322}
323
324pub fn parse_main<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
325    let parser = grammar::MainContentsParser::new();
326    parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
327}
328
329fn check_identifier(token: &LalrToken<'_>, handler: &Handler) {
330    const MAX_IDENTIFIER_LEN: usize = 31usize;
331    if token.token == Token::IdVariants(IdVariants::Identifier) {
332        if token.text.len() > MAX_IDENTIFIER_LEN {
333            handler.emit_err(leo_errors::ParserError::identifier_too_long(
334                token.text,
335                token.text.len(),
336                MAX_IDENTIFIER_LEN,
337                token.span,
338            ));
339        }
340        // These are reserved for compiler-generated names.
341        if token.text.contains("__") {
342            handler.emit_err(ParserError::identifier_cannot_contain_double_underscore(token.text, token.span));
343        }
344    }
345}
346
347fn parse_general<'a>(
348    handler: Handler,
349    source: &'a str,
350    start_pos: u32,
351    parse: impl FnOnce(
352        &mut Lexer<'a>,
353    ) -> Result<SyntaxNode<'a>, lalrpop_util::ParseError<usize, LalrToken<'a>, &'static str>>,
354) -> Result<SyntaxNode<'a>> {
355    let mut lexer = tokens::Lexer::new(source, start_pos, handler.clone());
356    match parse(&mut lexer) {
357        Ok(val) => {
358            handler.last_err()?;
359            Ok(val)
360        }
361        Err(e) => {
362            if matches!(e, lalrpop_util::ParseError::UnrecognizedEof { .. }) {
363                // We don't want to redundantly report the EOF error, when the meaningfull
364                // errors are recorded in the handler.
365                handler.last_err()?;
366            }
367            Err(convert(e, source, start_pos))
368        }
369    }
370}
371
372// We can't implement From<lalrpop_util::ParseError> since both that
373// trait and leo_errors::Error are defined in other crates.
374fn convert(
375    error: lalrpop_util::ParseError<usize, LalrToken<'_>, &'static str>,
376    source: &str,
377    start_pos: u32,
378) -> LeoError {
379    match error {
380        lalrpop_util::ParseError::UnrecognizedToken { token, expected } => {
381            let expected = expected.iter().flat_map(|s| tokens::Token::str_user(s)).format(", ");
382            ParserError::unexpected(token.1.text, expected, token.1.span).into()
383        }
384        lalrpop_util::ParseError::UnrecognizedEof { location, .. } => {
385            let (lo, hi) = if source.is_empty() {
386                (start_pos, start_pos)
387            } else if location >= source.len() + start_pos as usize {
388                // Generally lalrpop reports the `location` for this error as
389                // one character past the end of the source. So let's
390                // back up one character.
391                // Can't just subtract 1 as we may not be on a character boundary.
392                let lo = source.char_indices().last().unwrap().0 as u32 + start_pos;
393                (lo, lo + 1)
394            } else {
395                (location as u32, location as u32 + 1)
396            };
397            ParserError::unexpected_eof(Span { lo, hi }).into()
398        }
399        x => panic!("ERR: {x:?}"),
400    }
401}