somni_parser/
parser.rs

1//! Grammar parser.
2//!
3//! This module parses the following grammar (minus comments, which are ignored):
4//!
5//! ```text
6//! program -> item* EOF;
7//! item -> function | global | extern_fn; // TODO types.
8//!
9//! extern_fn -> 'extern' 'fn' identifier '(' function_argument ( ',' function_argument )* ','? ')' return_decl? ;
10//! global -> 'var' identifier ':' type '=' static_initializer ';' ;
11//!
12//! static_initializer -> right_hand_expression ; // The language does not currently support function calls.
13//!
14//! function -> 'fn' identifier '(' function_argument ( ',' function_argument )* ','? ')' return_decl? body ;
15//! function_argument -> identifier ':' '&'? type ;
16//! return_decl -> '->' type ;
17//! type -> identifier ;
18//!
19//! body -> '{' statement '}' ;
20//! statement -> 'var' identifier (':' type)? '=' right_hand_expression ';' (statement)?
21//!            | 'return' right_hand_expression? ';' (statement)?
22//!            | 'break' ';' (statement)?
23//!            | 'continue' ';' (statement)?
24//!            | 'if' right_hand_expression body ( 'else' body )? (statement)?
25//!            | 'loop' body (statement)?
26//!            | 'while' right_hand_expression body (statement)?
27//!            | body (statement)?
28//!            | expression ';' (statement)?
29//!            | right_hand_expression // implicit return statmenet
30//!
31//! expression -> (left_hand_expression '=')? right_hand_expression ;
32//!
33//! left_hand_expression -> ( '*' )? identifier ; // Should be a valid expression, too.
34//!
35//! right_hand_expression -> binary2 ( '||' binary2 )* ;
36//! binary2 -> binary3 ( '&&' binary3 )* ;
37//! binary3 -> binary4 ( ( '<' | '<=' | '>' | '>=' | '==' | '!=' ) binary4 )* ;
38//! binary4 -> binary5 ( '|' binary5 )* ;
39//! binary5 -> binary6 ( '^' binary6 )* ;
40//! binary6 -> binary7 ( '&' binary7 )* ;
41//! binary7 -> binary8 ( ( '<<' | '>>' ) binary8 )* ;
42//! binary8 -> binary9 ( ( '+' | '-' ) binary9 )* ;
43//! binary9 -> unary ( ( '*' | '/', '%' ) unary )* ;
44//! unary -> ('!' | '-' | '&' | '*' )* primary | call ;
45//! primary -> ( literal | identifier ( '(' call_arguments ')' )? ) | '(' right_hand_expression ')' ;
46//! call_arguments -> right_hand_expression ( ',' right_hand_expression )* ','? ;
47//! literal -> NUMBER | STRING | 'true' | 'false' ;
48//! ```
49//!
50//! `NUMBER`: Non-negative integers (binary, decimal, hexadecimal) and floats.
51//! `STRING`: Double-quoted strings with escape sequences.
52
53use std::{
54    fmt::{Debug, Display},
55    num::{ParseFloatError, ParseIntError},
56    ops::ControlFlow,
57};
58
59use crate::{
60    ast::{
61        Body, Break, Continue, Else, EmptyReturn, Expression, ExternalFunction, Function,
62        FunctionArgument, GlobalVariable, If, Item, LeftHandExpression, Literal, LiteralValue,
63        Loop, Program, ReturnDecl, ReturnWithValue, RightHandExpression, Statement, TypeHint,
64        VariableDefinition,
65    },
66    lexer::{Token, TokenKind, Tokenizer},
67    parser::private::Sealed,
68    Error, Location,
69};
70
71mod private {
72    pub trait Sealed {}
73
74    impl Sealed for u32 {}
75    impl Sealed for u64 {}
76    impl Sealed for u128 {}
77    impl Sealed for f32 {}
78    impl Sealed for f64 {}
79}
80
81/// Parse literals into Somni integers.
82pub trait IntParser: Sized + Sealed {
83    fn parse(str: &str, radix: u32) -> Result<Self, ParseIntError>;
84}
85
86impl IntParser for u32 {
87    fn parse(str: &str, radix: u32) -> Result<Self, ParseIntError> {
88        u32::from_str_radix(str, radix)
89    }
90}
91impl IntParser for u64 {
92    fn parse(str: &str, radix: u32) -> Result<Self, ParseIntError> {
93        u64::from_str_radix(str, radix)
94    }
95}
96impl IntParser for u128 {
97    fn parse(str: &str, radix: u32) -> Result<Self, ParseIntError> {
98        u128::from_str_radix(str, radix)
99    }
100}
101
102/// Parse literals into Somni floats.
103pub trait FloatParser: Sized + Sealed {
104    fn parse(str: &str) -> Result<Self, ParseFloatError>;
105}
106
107impl FloatParser for f32 {
108    fn parse(str: &str) -> Result<Self, ParseFloatError> {
109        str.parse::<f32>()
110    }
111}
112impl FloatParser for f64 {
113    fn parse(str: &str) -> Result<Self, ParseFloatError> {
114        str.parse::<f64>()
115    }
116}
117
118/// Defines the numeric types used in the parser.
119pub trait TypeSet: Debug + Default {
120    type Integer: IntParser + Clone + Copy + PartialEq + Debug;
121    type Float: FloatParser + Clone + Copy + PartialEq + Debug;
122}
123
124/// Use 64-bit integers and 64-bit floats (default).
125#[derive(Debug, Default)]
126pub struct DefaultTypeSet;
127impl Sealed for DefaultTypeSet {}
128
129impl TypeSet for DefaultTypeSet {
130    type Integer = u64;
131    type Float = f64;
132}
133
134/// Use 32-bit integers and floats.
135#[derive(Debug, Default)]
136pub struct TypeSet32;
137impl Sealed for TypeSet32 {}
138impl TypeSet for TypeSet32 {
139    type Integer = u32;
140    type Float = f32;
141}
142
143/// Use 128-bit integers and 64-bit floats.
144#[derive(Debug, Default)]
145pub struct TypeSet128;
146impl Sealed for TypeSet128 {}
147impl TypeSet for TypeSet128 {
148    type Integer = u128;
149    type Float = f64;
150}
151
152impl<T> Program<T>
153where
154    T: TypeSet,
155{
156    fn parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
157        let mut items = Vec::new();
158
159        while !stream.end()? {
160            items.push(Item::parse(stream)?);
161        }
162
163        Ok(Program { items })
164    }
165}
166
167impl<T> Item<T>
168where
169    T: TypeSet,
170{
171    fn parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
172        if let Some(global_var) = GlobalVariable::try_parse(stream)? {
173            return Ok(Item::GlobalVariable(global_var));
174        }
175        if let Some(function) = ExternalFunction::try_parse(stream)? {
176            return Ok(Item::ExternFunction(function));
177        }
178        if let Some(function) = Function::try_parse(stream)? {
179            return Ok(Item::Function(function));
180        }
181
182        Err(stream.error("Expected global variable or function definition"))
183    }
184}
185
186impl<T> GlobalVariable<T>
187where
188    T: TypeSet,
189{
190    fn try_parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Option<Self>, Error> {
191        let Some(decl_token) = stream.take_match(TokenKind::Identifier, &["var"])? else {
192            return Ok(None);
193        };
194
195        let identifier = stream.expect_match(TokenKind::Identifier, &[])?;
196        let colon = stream.expect_match(TokenKind::Symbol, &[":"])?;
197        let type_token = TypeHint::parse(stream)?;
198        let equals_token = stream.expect_match(TokenKind::Symbol, &["="])?;
199        let initializer = Expression::Expression {
200            expression: RightHandExpression::parse(stream)?,
201        };
202        let semicolon = stream.expect_match(TokenKind::Symbol, &[";"])?;
203
204        Ok(Some(GlobalVariable {
205            decl_token,
206            identifier,
207            colon,
208            type_token,
209            equals_token,
210            initializer,
211            semicolon,
212        }))
213    }
214}
215
216impl ExternalFunction {
217    fn try_parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Option<Self>, Error> {
218        let Some(extern_fn_token) = stream.take_match(TokenKind::Identifier, &["extern"])? else {
219            return Ok(None);
220        };
221        let Some(fn_token) = stream.take_match(TokenKind::Identifier, &["fn"])? else {
222            return Ok(None);
223        };
224
225        let name = stream.expect_match(TokenKind::Identifier, &[])?;
226        let opening_paren = stream.expect_match(TokenKind::Symbol, &["("])?;
227
228        let mut arguments = Vec::new();
229        while let Some(arg_name) = stream.take_match(TokenKind::Identifier, &[])? {
230            let colon = stream.expect_match(TokenKind::Symbol, &[":"])?;
231            let reference_token = stream.take_match(TokenKind::Symbol, &["&"])?;
232            let type_token = TypeHint::parse(stream)?;
233
234            arguments.push(FunctionArgument {
235                name: arg_name,
236                colon,
237                reference_token,
238                arg_type: type_token,
239            });
240
241            if stream.take_match(TokenKind::Symbol, &[","])?.is_none() {
242                break;
243            }
244        }
245
246        let closing_paren = stream.expect_match(TokenKind::Symbol, &[")"])?;
247
248        let return_decl =
249            if let Some(return_token) = stream.take_match(TokenKind::Symbol, &["->"])? {
250                Some(ReturnDecl {
251                    return_token,
252                    return_type: TypeHint::parse(stream)?,
253                })
254            } else {
255                None
256            };
257
258        let semicolon = stream.expect_match(TokenKind::Symbol, &[";"])?;
259
260        Ok(Some(ExternalFunction {
261            extern_fn_token,
262            fn_token,
263            name,
264            opening_paren,
265            arguments,
266            closing_paren,
267            return_decl,
268            semicolon,
269        }))
270    }
271}
272
273impl<T> Function<T>
274where
275    T: TypeSet,
276{
277    fn try_parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Option<Self>, Error> {
278        let Some(fn_token) = stream.take_match(TokenKind::Identifier, &["fn"])? else {
279            return Ok(None);
280        };
281
282        let name = stream.expect_match(TokenKind::Identifier, &[])?;
283        let opening_paren = stream.expect_match(TokenKind::Symbol, &["("])?;
284
285        let mut arguments = Vec::new();
286        while let Some(arg_name) = stream.take_match(TokenKind::Identifier, &[])? {
287            let colon = stream.expect_match(TokenKind::Symbol, &[":"])?;
288            let reference_token = stream.take_match(TokenKind::Symbol, &["&"])?;
289            let type_token = TypeHint::parse(stream)?;
290
291            arguments.push(FunctionArgument {
292                name: arg_name,
293                colon,
294                reference_token,
295                arg_type: type_token,
296            });
297
298            if stream.take_match(TokenKind::Symbol, &[","])?.is_none() {
299                break;
300            }
301        }
302
303        let closing_paren = stream.expect_match(TokenKind::Symbol, &[")"])?;
304
305        let return_decl =
306            if let Some(return_token) = stream.take_match(TokenKind::Symbol, &["->"])? {
307                Some(ReturnDecl {
308                    return_token,
309                    return_type: TypeHint::parse(stream)?,
310                })
311            } else {
312                None
313            };
314
315        let body = Body::parse(stream)?;
316
317        Ok(Some(Function {
318            fn_token,
319            name,
320            opening_paren,
321            arguments,
322            closing_paren,
323            return_decl,
324            body,
325        }))
326    }
327}
328
329impl<T> Body<T>
330where
331    T: TypeSet,
332{
333    fn parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
334        let opening_brace = stream.expect_match(TokenKind::Symbol, &["{"])?;
335
336        let mut body = Vec::new();
337        while Statement::<T>::matches(stream)? {
338            let (statement, stop) = match Statement::parse(stream)? {
339                ControlFlow::Continue(statement) => (statement, false),
340                ControlFlow::Break(statement) => (statement, true),
341            };
342            body.push(statement);
343            if stop {
344                break;
345            }
346        }
347
348        let closing_brace = stream.expect_match(TokenKind::Symbol, &["}"])?;
349
350        Ok(Body {
351            opening_brace,
352            statements: body,
353            closing_brace,
354        })
355    }
356}
357
358impl TypeHint {
359    fn parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
360        let type_name = stream.expect_match(TokenKind::Identifier, &[])?;
361
362        Ok(TypeHint { type_name })
363    }
364}
365
366impl<T> Statement<T>
367where
368    T: TypeSet,
369{
370    fn matches(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<bool, Error> {
371        stream
372            .peek_match(TokenKind::Symbol, &["}"])
373            .map(|t| t.is_none())
374    }
375
376    fn parse(
377        stream: &mut TokenStream<'_, impl Tokenizer>,
378    ) -> Result<ControlFlow<Self, Self>, Error> {
379        if let Some(return_token) = stream.take_match(TokenKind::Identifier, &["return"])? {
380            let return_kind =
381                if let Some(semicolon) = stream.take_match(TokenKind::Symbol, &[";"])? {
382                    // Continue, parsing unreachable code is allowed
383                    Statement::EmptyReturn(EmptyReturn {
384                        return_token,
385                        semicolon,
386                    })
387                } else {
388                    let expr = RightHandExpression::parse(stream)?;
389                    let semicolon = stream.expect_match(TokenKind::Symbol, &[";"])?;
390                    Statement::Return(ReturnWithValue {
391                        return_token,
392                        expression: expr,
393                        semicolon,
394                    })
395                };
396
397            // Continue, parsing unreachable code is allowed
398            return Ok(ControlFlow::Continue(return_kind));
399        }
400
401        if let Some(decl_token) = stream.take_match(TokenKind::Identifier, &["var"])? {
402            let identifier = stream.expect_match(TokenKind::Identifier, &[])?;
403
404            let type_token = if stream.take_match(TokenKind::Symbol, &[":"])?.is_some() {
405                Some(TypeHint::parse(stream)?)
406            } else {
407                None
408            };
409
410            let equals_token = stream.expect_match(TokenKind::Symbol, &["="])?;
411            let expression = RightHandExpression::parse(stream)?;
412            let semicolon = stream.expect_match(TokenKind::Symbol, &[";"])?;
413
414            return Ok(ControlFlow::Continue(Statement::VariableDefinition(
415                VariableDefinition {
416                    decl_token,
417                    identifier,
418                    type_token,
419                    equals_token,
420                    initializer: expression,
421                    semicolon,
422                },
423            )));
424        }
425
426        if let Some(if_token) = stream.take_match(TokenKind::Identifier, &["if"])? {
427            let condition = RightHandExpression::parse(stream)?;
428            let body = Body::parse(stream)?;
429
430            let else_branch =
431                if let Some(else_token) = stream.take_match(TokenKind::Identifier, &["else"])? {
432                    let else_body = Body::parse(stream)?;
433
434                    Some(Else {
435                        else_token,
436                        else_body,
437                    })
438                } else {
439                    None
440                };
441
442            return Ok(ControlFlow::Continue(Statement::If(If {
443                if_token,
444                condition,
445                body,
446                else_branch,
447            })));
448        }
449
450        if let Some(loop_token) = stream.take_match(TokenKind::Identifier, &["loop"])? {
451            let body = Body::parse(stream)?;
452            return Ok(ControlFlow::Continue(Statement::Loop(Loop {
453                loop_token,
454                body,
455            })));
456        }
457
458        if let Some(while_token) = stream.take_match(TokenKind::Identifier, &["while"])? {
459            // Desugar while into loop { if condition { loop_body; } else { break; } }
460            let condition = RightHandExpression::parse(stream)?;
461            let body = Body::parse(stream)?;
462            return Ok(ControlFlow::Continue(Statement::Loop(Loop {
463                loop_token: while_token,
464                body: Body {
465                    opening_brace: body.opening_brace,
466                    closing_brace: body.closing_brace,
467                    statements: vec![Statement::If(If {
468                        if_token: while_token,
469                        condition: condition.clone(),
470                        body: body.clone(),
471                        else_branch: Some(Else {
472                            else_token: while_token,
473                            else_body: Body {
474                                opening_brace: body.opening_brace,
475                                closing_brace: body.closing_brace,
476                                statements: vec![Statement::Break(Break {
477                                    break_token: while_token,
478                                    semicolon: while_token,
479                                })],
480                            },
481                        }),
482                    })],
483                },
484            })));
485        }
486
487        if let Some(break_token) = stream.take_match(TokenKind::Identifier, &["break"])? {
488            let semicolon = stream.expect_match(TokenKind::Symbol, &[";"])?;
489            // Continue, unreachable code is allowed
490            return Ok(ControlFlow::Continue(Statement::Break(Break {
491                break_token,
492                semicolon,
493            })));
494        }
495        if let Some(continue_token) = stream.take_match(TokenKind::Identifier, &["continue"])? {
496            let semicolon = stream.expect_match(TokenKind::Symbol, &[";"])?;
497            // Continue, unreachable code is allowed
498            return Ok(ControlFlow::Continue(Statement::Continue(Continue {
499                continue_token,
500                semicolon,
501            })));
502        }
503
504        if let Ok(Some(_)) = stream.peek_match(TokenKind::Symbol, &["{"]) {
505            return Ok(ControlFlow::Continue(Statement::Scope(Body::parse(
506                stream,
507            )?)));
508        }
509
510        let save = stream.clone();
511        let expression = Expression::parse(stream)?;
512        match stream.take_match(TokenKind::Symbol, &[";"])? {
513            Some(semicolon) => Ok(ControlFlow::Continue(Statement::Expression {
514                expression,
515                semicolon,
516            })),
517            None => {
518                // No semicolon, re-parse as a right-hand expression
519                *stream = save;
520                let expression = RightHandExpression::parse(stream)?;
521
522                Ok(ControlFlow::Break(Statement::ImplicitReturn(expression)))
523            }
524        }
525    }
526}
527
528impl<T> Literal<T>
529where
530    T: TypeSet,
531{
532    fn parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
533        let token = stream.peek_expect()?;
534
535        let token_source = stream.source(token.location);
536        let location = token.location;
537
538        let literal_value = match token.kind {
539            TokenKind::BinaryInteger => Self::parse_integer_literal(&token_source[2..], 2)
540                .map_err(|_| stream.error("Invalid binary integer literal"))?,
541            TokenKind::DecimalInteger => Self::parse_integer_literal(token_source, 10)
542                .map_err(|_| stream.error("Invalid integer literal"))?,
543            TokenKind::HexInteger => Self::parse_integer_literal(&token_source[2..], 16)
544                .map_err(|_| stream.error("Invalid hexadecimal integer literal"))?,
545            TokenKind::Float => <T::Float as FloatParser>::parse(token_source)
546                .map(LiteralValue::Float)
547                .map_err(|_| stream.error("Invalid float literal"))?,
548            TokenKind::String => match unescape(&token_source[1..token_source.len() - 1]) {
549                Ok(string) => LiteralValue::String(string),
550                Err(offset) => {
551                    return Err(Error {
552                        error: String::from("Invalid escape sequence in string literal")
553                            .into_boxed_str(),
554                        location: Location {
555                            start: token.location.start + offset,
556                            end: token.location.start + offset + 1,
557                        },
558                    });
559                }
560            },
561            TokenKind::Identifier if token_source == "true" => LiteralValue::Boolean(true),
562            TokenKind::Identifier if token_source == "false" => LiteralValue::Boolean(false),
563            _ => return Err(stream.error("Expected literal (number, string, or boolean)")),
564        };
565
566        stream.expect_match(token.kind, &[])?;
567        Ok(Self {
568            value: literal_value,
569            location,
570        })
571    }
572
573    fn parse_integer_literal(
574        token_source: &str,
575        radix: u32,
576    ) -> Result<LiteralValue<T>, ParseIntError> {
577        <T::Integer as IntParser>::parse(token_source, radix).map(LiteralValue::Integer)
578    }
579}
580
581fn unescape(s: &str) -> Result<String, usize> {
582    let mut result = String::new();
583    let mut escaped = false;
584    for (i, c) in s.char_indices().peekable() {
585        if escaped {
586            match c {
587                'n' => result.push('\n'),
588                't' => result.push('\t'),
589                '\\' => result.push('\\'),
590                '"' => result.push('"'),
591                '\'' => result.push('\''),
592                _ => return Err(i), // Invalid escape sequence
593            }
594            escaped = false;
595        } else if c == '\\' {
596            escaped = true;
597        } else {
598            result.push(c);
599        }
600    }
601
602    Ok(result)
603}
604
605impl LeftHandExpression {
606    fn parse<T: TypeSet>(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
607        const UNARY_OPERATORS: &[&str] = &["*"];
608        let expr = if let Some(operator) = stream.take_match(TokenKind::Symbol, UNARY_OPERATORS)? {
609            Self::Deref {
610                operator,
611                name: Self::parse_name::<T>(stream)?,
612            }
613        } else {
614            Self::Name {
615                variable: Self::parse_name::<T>(stream)?,
616            }
617        };
618
619        Ok(expr)
620    }
621
622    fn parse_name<T: TypeSet>(
623        stream: &mut TokenStream<'_, impl Tokenizer>,
624    ) -> Result<Token, Error> {
625        let token = stream.peek_expect()?;
626        match token.kind {
627            TokenKind::Identifier => {
628                // true, false?
629                match Literal::<T>::parse(stream) {
630                    Ok(_) => Err(Error {
631                        error: "Parse error: Literals are not valid on the left-hand side"
632                            .to_string()
633                            .into_boxed_str(),
634                        location: token.location,
635                    }),
636                    _ => stream
637                        .take_match(TokenKind::Identifier, &[])
638                        .map(|v| v.unwrap()),
639                }
640            }
641            _ => Err(stream.error("Expected variable name or deref operator")),
642        }
643    }
644}
645
646impl<T> Expression<T>
647where
648    T: TypeSet,
649{
650    fn parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
651        let save = stream.clone();
652
653        let expression = RightHandExpression::<T>::parse(stream)?;
654
655        if let Ok(Some(operator)) = stream.take_match(TokenKind::Symbol, &["="]) {
656            // Re-parse as an assignment
657            *stream = save;
658            let left_expr = LeftHandExpression::parse::<T>(stream)?;
659            stream.expect_match(TokenKind::Symbol, &["="])?;
660            let right_expr = RightHandExpression::parse(stream)?;
661
662            Ok(Self::Assignment {
663                left_expr,
664                operator,
665                right_expr,
666            })
667        } else {
668            Ok(Self::Expression { expression })
669        }
670    }
671}
672
673impl<T> RightHandExpression<T>
674where
675    T: TypeSet,
676{
677    fn parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
678        // We define the binary operators from the lowest precedence to the highest.
679        // Each recursive call to `parse_binary` will handle one level of precedence, and pass
680        // the rest to the inner calls of `parse_binary`.
681        let operators: &[&[&str]] = &[
682            &["||"],
683            &["&&"],
684            &["<", "<=", ">", ">=", "==", "!="],
685            &["|"],
686            &["^"],
687            &["&"],
688            &["<<", ">>"],
689            &["+", "-"],
690            &["*", "/", "%"],
691        ];
692
693        Self::parse_binary(stream, operators)
694    }
695
696    fn parse_binary(
697        stream: &mut TokenStream<'_, impl Tokenizer>,
698        binary_operators: &[&[&str]],
699    ) -> Result<Self, Error> {
700        let Some((current, higher)) = binary_operators.split_first() else {
701            unreachable!("At least one operator set is expected");
702        };
703
704        let mut expr = if higher.is_empty() {
705            Self::parse_unary(stream)?
706        } else {
707            Self::parse_binary(stream, higher)?
708        };
709
710        while let Some(operator) = stream.take_match(TokenKind::Symbol, current)? {
711            let rhs = if higher.is_empty() {
712                Self::parse_unary(stream)?
713            } else {
714                Self::parse_binary(stream, higher)?
715            };
716
717            expr = Self::BinaryOperator {
718                name: operator,
719                operands: Box::new([expr, rhs]),
720            };
721        }
722
723        Ok(expr)
724    }
725
726    fn parse_unary(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
727        const UNARY_OPERATORS: &[&str] = &["!", "-", "&", "*"];
728        if let Some(operator) = stream.take_match(TokenKind::Symbol, UNARY_OPERATORS)? {
729            let operand = Self::parse_unary(stream)?;
730            Ok(Self::UnaryOperator {
731                name: operator,
732                operand: Box::new(operand),
733            })
734        } else {
735            Self::parse_primary(stream)
736        }
737    }
738
739    fn parse_primary(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
740        let token = stream.peek_expect()?;
741
742        match token.kind {
743            TokenKind::Identifier => {
744                // true, false?
745                if let Ok(literal) = Literal::<T>::parse(stream) {
746                    return Ok(Self::Literal { value: literal });
747                }
748
749                Self::parse_call(stream)
750            }
751            TokenKind::Symbol if stream.source(token.location) == "(" => {
752                stream.take_match(token.kind, &[])?;
753                let expr = Self::parse(stream)?;
754                stream.expect_match(TokenKind::Symbol, &[")"])?;
755                Ok(expr)
756            }
757            TokenKind::HexInteger
758            | TokenKind::DecimalInteger
759            | TokenKind::BinaryInteger
760            | TokenKind::Float
761            | TokenKind::String => Literal::<T>::parse(stream).map(|value| Self::Literal { value }),
762            _ => Err(stream.error("Expected variable, literal, or '('")),
763        }
764    }
765
766    fn parse_call(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
767        let token = stream.expect_match(TokenKind::Identifier, &[])?;
768
769        if stream.take_match(TokenKind::Symbol, &["("])?.is_none() {
770            return Ok(Self::Variable { variable: token });
771        };
772
773        let mut arguments = Vec::new();
774        while stream.peek_match(TokenKind::Symbol, &[")"])?.is_none() {
775            let arg = Self::parse(stream)?;
776            arguments.push(arg);
777
778            if stream.take_match(TokenKind::Symbol, &[","])?.is_none() {
779                break;
780            }
781        }
782        stream.expect_match(TokenKind::Symbol, &[")"])?;
783
784        Ok(Self::FunctionCall {
785            name: token,
786            arguments: arguments.into_boxed_slice(),
787        })
788    }
789}
790
791#[derive(Clone)]
792struct TokenStream<'s, I>
793where
794    I: Tokenizer,
795{
796    source: &'s str,
797    tokens: I,
798}
799
800impl<'s, I> TokenStream<'s, I>
801where
802    I: Tokenizer,
803{
804    fn new(source: &'s str, tokens: I) -> Self {
805        TokenStream { source, tokens }
806    }
807
808    /// Fast-forwards the token iterator past comments.
809    fn skip_comments(&mut self) -> Result<(), Error> {
810        let mut peekable = self.tokens.clone();
811        while let Some(token) = peekable.next() {
812            if token?.kind == TokenKind::Comment {
813                self.tokens = peekable.clone();
814            } else {
815                break;
816            }
817        }
818
819        Ok(())
820    }
821
822    fn end(&mut self) -> Result<bool, Error> {
823        self.skip_comments()?;
824
825        Ok(self.tokens.clone().next().is_none())
826    }
827
828    fn peek(&mut self) -> Result<Option<Token>, Error> {
829        self.skip_comments()?;
830
831        match self.tokens.clone().next() {
832            Some(Ok(token)) => Ok(Some(token)),
833            Some(Err(error)) => Err(error),
834            None => Ok(None),
835        }
836    }
837
838    fn peek_expect(&mut self) -> Result<Token, Error> {
839        self.peek()?.ok_or_else(|| Error {
840            location: Location {
841                start: self.source.len(),
842                end: self.source.len(),
843            },
844            error: String::from("Unexpected end of input").into_boxed_str(),
845        })
846    }
847
848    fn peek_match(
849        &mut self,
850        token_kind: TokenKind,
851        source: &[&str],
852    ) -> Result<Option<Token>, Error> {
853        let Some(token) = self.peek()? else {
854            return Ok(None);
855        };
856
857        let peeked = if token.kind == token_kind
858            && (source.is_empty() || source.contains(&self.source(token.location)))
859        {
860            Some(token)
861        } else {
862            None
863        };
864
865        Ok(peeked)
866    }
867
868    fn take_match(
869        &mut self,
870        token_kind: TokenKind,
871        source: &[&str],
872    ) -> Result<Option<Token>, Error> {
873        self.peek_match(token_kind, source).map(|token| {
874            if let Some(token) = token {
875                self.tokens.next();
876                Some(token)
877            } else {
878                None
879            }
880        })
881    }
882
883    /// Takes the next token if it matches the expected kind and source.
884    /// Returns an error if the token does not match.
885    ///
886    /// If `source` is empty, it only checks the token kind.
887    /// If `source` is not empty, it checks if the token's source matches any of the provided strings.
888    fn expect_match(&mut self, token_kind: TokenKind, source: &[&str]) -> Result<Token, Error> {
889        if let Some(token) = self.take_match(token_kind, source)? {
890            Ok(token)
891        } else {
892            let token = self.peek()?;
893            let found = if let Some(token) = token {
894                if token.kind == token_kind {
895                    format!("found '{}'", self.source(token.location))
896                } else {
897                    format!("found {:?}", token.kind)
898                }
899            } else {
900                "reached end of input".to_string()
901            };
902            match source {
903                [] => Err(self.error(format_args!("Expected {token_kind:?}, {found}"))),
904                [s] => Err(self.error(format_args!("Expected '{s}', {found}"))),
905                _ => Err(self.error(format_args!("Expected one of {source:?}, {found}"))),
906            }
907        }
908    }
909
910    fn error(&self, message: impl Display) -> Error {
911        Error {
912            error: format!("Parse error: {message}").into_boxed_str(),
913            location: if let Some(Ok(token)) = self.tokens.clone().next() {
914                token.location
915            } else {
916                Location {
917                    start: self.source.len(),
918                    end: self.source.len(),
919                }
920            },
921        }
922    }
923
924    fn source(&self, location: Location) -> &'s str {
925        location.extract(self.source)
926    }
927}
928
929pub fn parse<T>(source: &str) -> Result<Program<T>, Error>
930where
931    T: TypeSet,
932{
933    let tokens = crate::lexer::tokenize(source);
934    let mut stream = TokenStream::new(source, tokens);
935
936    Program::<T>::parse(&mut stream)
937}
938
939pub fn parse_expression<T>(source: &str) -> Result<Expression<T>, Error>
940where
941    T: TypeSet,
942{
943    let tokens = crate::lexer::tokenize(source);
944    let mut stream = TokenStream::new(source, tokens);
945
946    Expression::<T>::parse(&mut stream)
947}
948
949#[cfg(test)]
950mod tests {
951    use super::*;
952
953    #[test]
954    fn test_unescape() {
955        assert_eq!(unescape(r#"Hello\nWorld\t!"#).unwrap(), "Hello\nWorld\t!");
956        assert_eq!(unescape(r#"Hello\\World"#).unwrap(), "Hello\\World");
957        assert_eq!(unescape(r#"Hello\zWorld"#), Err(6)); // Invalid escape sequence
958    }
959
960    #[test]
961    fn test_out_of_range_literal() {
962        let source = "0x100000000";
963
964        let result = parse_expression::<TypeSet32>(source).expect_err("Parsing should fail");
965        assert_eq!(
966            "Parse error: Invalid hexadecimal integer literal",
967            result.error.as_ref()
968        );
969    }
970}