somni_parser/
parser.rs

1//! Grammar parser.
2//!
3//! This module parses the following grammar (minus comments, which are ignored):
4//!
5//! ```text
6//! program -> item* EOF;
7//! item -> function | global | extern_fn; // TODO types.
8//!
9//! extern_fn -> 'extern' 'fn' identifier '(' function_argument ( ',' function_argument )* ','? ')' return_decl? ;
10//! global -> 'var' identifier ':' type '=' static_initializer ';' ;
11//!
12//! static_initializer -> right_hand_expression ; // The language does not currently support function calls.
13//!
14//! function -> 'fn' identifier '(' function_argument ( ',' function_argument )* ','? ')' return_decl? body ;
15//! function_argument -> identifier ':' '&'? type ;
16//! return_decl -> '->' type ;
17//! type -> identifier ;
18//!
19//! body -> '{' statement '}' ;
20//! statement -> 'var' identifier (':' type)? '=' right_hand_expression ';' (statement)?
21//!            | 'return' right_hand_expression? ';' (statement)?
22//!            | 'break' ';' (statement)?
23//!            | 'continue' ';' (statement)?
24//!            | 'if' right_hand_expression body ( 'else' body )? (statement)?
25//!            | 'loop' body (statement)?
26//!            | 'while' right_hand_expression body (statement)?
27//!            | body (statement)?
28//!            | expression ';' (statement)?
29//!            | right_hand_expression // implicit return statmenet
30//!
31//! expression -> (left_hand_expression '=')? right_hand_expression ;
32//!
33//! left_hand_expression -> ( '*' )? identifier ; // Should be a valid expression, too.
34//!
35//! right_hand_expression -> binary2 ( '||' binary2 )* ;
36//! binary2 -> binary3 ( '&&' binary3 )* ;
37//! binary3 -> binary4 ( ( '<' | '<=' | '>' | '>=' | '==' | '!=' ) binary4 )* ;
38//! binary4 -> binary5 ( '|' binary5 )* ;
39//! binary5 -> binary6 ( '^' binary6 )* ;
40//! binary6 -> binary7 ( '&' binary7 )* ;
41//! binary7 -> binary8 ( ( '<<' | '>>' ) binary8 )* ;
42//! binary8 -> binary9 ( ( '+' | '-' ) binary9 )* ;
43//! binary9 -> unary ( ( '*' | '/', '%' ) unary )* ;
44//! unary -> ('!' | '-' | '&' | '*' )* primary | call ;
45//! primary -> ( literal | identifier ( '(' call_arguments ')' )? ) | '(' right_hand_expression ')' ;
46//! call_arguments -> right_hand_expression ( ',' right_hand_expression )* ','? ;
47//! literal -> NUMBER | STRING | 'true' | 'false' ;
48//! ```
49//!
50//! `NUMBER`: Non-negative integers (binary, decimal, hexadecimal) and floats.
51//! `STRING`: Double-quoted strings with escape sequences.
52
53use std::{
54    fmt::{Debug, Display},
55    num::{ParseFloatError, ParseIntError},
56    ops::ControlFlow,
57};
58
59use crate::{
60    ast::{
61        Body, Break, Continue, Else, EmptyReturn, Expression, ExternalFunction, Function,
62        FunctionArgument, GlobalVariable, If, Item, LeftHandExpression, Literal, LiteralValue,
63        Loop, Program, ReturnDecl, ReturnWithValue, RightHandExpression, Statement, TypeHint,
64        VariableDefinition,
65    },
66    lexer::{Token, TokenKind, Tokenizer},
67    parser::private::Sealed,
68    Error, Location,
69};
70
71mod private {
72    pub trait Sealed {}
73
74    impl Sealed for u32 {}
75    impl Sealed for u64 {}
76    impl Sealed for u128 {}
77    impl Sealed for f32 {}
78    impl Sealed for f64 {}
79}
80
81/// Parse literals into Somni integers.
82pub trait IntParser: Sized + Sealed {
83    fn parse(str: &str, radix: u32) -> Result<Self, ParseIntError>;
84}
85
86impl IntParser for u32 {
87    fn parse(str: &str, radix: u32) -> Result<Self, ParseIntError> {
88        u32::from_str_radix(str, radix)
89    }
90}
91impl IntParser for u64 {
92    fn parse(str: &str, radix: u32) -> Result<Self, ParseIntError> {
93        u64::from_str_radix(str, radix)
94    }
95}
96impl IntParser for u128 {
97    fn parse(str: &str, radix: u32) -> Result<Self, ParseIntError> {
98        u128::from_str_radix(str, radix)
99    }
100}
101
102/// Parse literals into Somni floats.
103pub trait FloatParser: Sized + Sealed {
104    fn parse(str: &str) -> Result<Self, ParseFloatError>;
105}
106
107impl FloatParser for f32 {
108    fn parse(str: &str) -> Result<Self, ParseFloatError> {
109        str.parse::<f32>()
110    }
111}
112impl FloatParser for f64 {
113    fn parse(str: &str) -> Result<Self, ParseFloatError> {
114        str.parse::<f64>()
115    }
116}
117
118/// Defines the numeric types used in the parser.
119pub trait TypeSet: Debug + Default {
120    type Integer: IntParser + Clone + Copy + PartialEq + Debug;
121    type Float: FloatParser + Clone + Copy + PartialEq + Debug;
122}
123
124/// Use 64-bit integers and 64-bit floats (default).
125#[derive(Debug, Default)]
126pub struct DefaultTypeSet;
127impl Sealed for DefaultTypeSet {}
128
129impl TypeSet for DefaultTypeSet {
130    type Integer = u64;
131    type Float = f64;
132}
133
134/// Use 32-bit integers and floats.
135#[derive(Debug, Default)]
136pub struct TypeSet32;
137impl Sealed for TypeSet32 {}
138impl TypeSet for TypeSet32 {
139    type Integer = u32;
140    type Float = f32;
141}
142
143/// Use 128-bit integers and 64-bit floats.
144#[derive(Debug, Default)]
145pub struct TypeSet128;
146impl Sealed for TypeSet128 {}
147impl TypeSet for TypeSet128 {
148    type Integer = u128;
149    type Float = f64;
150}
151
152impl<T> Program<T>
153where
154    T: TypeSet,
155{
156    fn parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
157        let mut items = Vec::new();
158
159        while !stream.end()? {
160            items.push(Item::parse(stream)?);
161        }
162
163        Ok(Program { items })
164    }
165}
166
167impl<T> Item<T>
168where
169    T: TypeSet,
170{
171    fn parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
172        if let Some(global_var) = GlobalVariable::try_parse(stream)? {
173            return Ok(Item::GlobalVariable(global_var));
174        }
175        if let Some(function) = ExternalFunction::try_parse(stream)? {
176            return Ok(Item::ExternFunction(function));
177        }
178        if let Some(function) = Function::try_parse(stream)? {
179            return Ok(Item::Function(function));
180        }
181
182        Err(stream.error("Expected global variable or function definition"))
183    }
184}
185
186impl<T> GlobalVariable<T>
187where
188    T: TypeSet,
189{
190    fn try_parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Option<Self>, Error> {
191        let Some(decl_token) = stream.take_match(TokenKind::Identifier, &["var"])? else {
192            return Ok(None);
193        };
194
195        let identifier = stream.expect_match(TokenKind::Identifier, &[])?;
196        let colon = stream.expect_match(TokenKind::Symbol, &[":"])?;
197        let type_token = TypeHint::parse(stream)?;
198        let equals_token = stream.expect_match(TokenKind::Symbol, &["="])?;
199        let initializer = Expression::Expression {
200            expression: RightHandExpression::parse(stream)?,
201        };
202        let semicolon = stream.expect_match(TokenKind::Symbol, &[";"])?;
203
204        Ok(Some(GlobalVariable {
205            decl_token,
206            identifier,
207            colon,
208            type_token,
209            equals_token,
210            initializer,
211            semicolon,
212        }))
213    }
214}
215
216impl ExternalFunction {
217    fn try_parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Option<Self>, Error> {
218        let Some(extern_fn_token) = stream.take_match(TokenKind::Identifier, &["extern"])? else {
219            return Ok(None);
220        };
221        let Some(fn_token) = stream.take_match(TokenKind::Identifier, &["fn"])? else {
222            return Ok(None);
223        };
224
225        let name = stream.expect_match(TokenKind::Identifier, &[])?;
226        let opening_paren = stream.expect_match(TokenKind::Symbol, &["("])?;
227
228        let mut arguments = Vec::new();
229        while let Some(arg_name) = stream.take_match(TokenKind::Identifier, &[])? {
230            let colon = stream.expect_match(TokenKind::Symbol, &[":"])?;
231            let reference_token = stream.take_match(TokenKind::Symbol, &["&"])?;
232            let type_token = TypeHint::parse(stream)?;
233
234            arguments.push(FunctionArgument {
235                name: arg_name,
236                colon,
237                reference_token,
238                arg_type: type_token,
239            });
240
241            if stream.take_match(TokenKind::Symbol, &[","])?.is_none() {
242                break;
243            }
244        }
245
246        let closing_paren = stream.expect_match(TokenKind::Symbol, &[")"])?;
247
248        let return_decl =
249            if let Some(return_token) = stream.take_match(TokenKind::Symbol, &["->"])? {
250                Some(ReturnDecl {
251                    return_token,
252                    return_type: TypeHint::parse(stream)?,
253                })
254            } else {
255                None
256            };
257
258        let semicolon = stream.expect_match(TokenKind::Symbol, &[";"])?;
259
260        Ok(Some(ExternalFunction {
261            extern_fn_token,
262            fn_token,
263            name,
264            opening_paren,
265            arguments,
266            closing_paren,
267            return_decl,
268            semicolon,
269        }))
270    }
271}
272
273impl<T> Function<T>
274where
275    T: TypeSet,
276{
277    fn try_parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Option<Self>, Error> {
278        let Some(fn_token) = stream.take_match(TokenKind::Identifier, &["fn"])? else {
279            return Ok(None);
280        };
281
282        let name = stream.expect_match(TokenKind::Identifier, &[])?;
283        let opening_paren = stream.expect_match(TokenKind::Symbol, &["("])?;
284
285        let mut arguments = Vec::new();
286        while let Some(arg_name) = stream.take_match(TokenKind::Identifier, &[])? {
287            let colon = stream.expect_match(TokenKind::Symbol, &[":"])?;
288            let reference_token = stream.take_match(TokenKind::Symbol, &["&"])?;
289            let type_token = TypeHint::parse(stream)?;
290
291            arguments.push(FunctionArgument {
292                name: arg_name,
293                colon,
294                reference_token,
295                arg_type: type_token,
296            });
297
298            if stream.take_match(TokenKind::Symbol, &[","])?.is_none() {
299                break;
300            }
301        }
302
303        let closing_paren = stream.expect_match(TokenKind::Symbol, &[")"])?;
304
305        let return_decl =
306            if let Some(return_token) = stream.take_match(TokenKind::Symbol, &["->"])? {
307                Some(ReturnDecl {
308                    return_token,
309                    return_type: TypeHint::parse(stream)?,
310                })
311            } else {
312                None
313            };
314
315        let body = Body::parse(stream)?;
316
317        Ok(Some(Function {
318            fn_token,
319            name,
320            opening_paren,
321            arguments,
322            closing_paren,
323            return_decl,
324            body,
325        }))
326    }
327}
328
329impl<T> Body<T>
330where
331    T: TypeSet,
332{
333    fn parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
334        let opening_brace = stream.expect_match(TokenKind::Symbol, &["{"])?;
335
336        let mut body = Vec::new();
337        while Statement::<T>::matches(stream)? {
338            let (statement, stop) = match Statement::parse(stream)? {
339                ControlFlow::Continue(statement) => (statement, false),
340                ControlFlow::Break(statement) => (statement, true),
341            };
342            body.push(statement);
343            if stop {
344                break;
345            }
346        }
347
348        let closing_brace = stream.expect_match(TokenKind::Symbol, &["}"])?;
349
350        Ok(Body {
351            opening_brace,
352            statements: body,
353            closing_brace,
354        })
355    }
356}
357
358impl TypeHint {
359    fn parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
360        let type_name = stream.expect_match(TokenKind::Identifier, &[])?;
361
362        Ok(TypeHint { type_name })
363    }
364}
365
366impl<T> Statement<T>
367where
368    T: TypeSet,
369{
370    fn matches(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<bool, Error> {
371        stream
372            .peek_match(TokenKind::Symbol, &["}"])
373            .map(|t| t.is_none())
374    }
375
376    fn parse(
377        stream: &mut TokenStream<'_, impl Tokenizer>,
378    ) -> Result<ControlFlow<Self, Self>, Error> {
379        if let Some(return_token) = stream.take_match(TokenKind::Identifier, &["return"])? {
380            let return_kind =
381                if let Some(semicolon) = stream.take_match(TokenKind::Symbol, &[";"])? {
382                    // Continue, parsing unreachable code is allowed
383                    Statement::EmptyReturn(EmptyReturn {
384                        return_token,
385                        semicolon,
386                    })
387                } else {
388                    let expr = RightHandExpression::parse(stream)?;
389                    let semicolon = stream.expect_match(TokenKind::Symbol, &[";"])?;
390                    Statement::Return(ReturnWithValue {
391                        return_token,
392                        expression: expr,
393                        semicolon,
394                    })
395                };
396
397            // Continue, parsing unreachable code is allowed
398            return Ok(ControlFlow::Continue(return_kind));
399        }
400
401        if let Some(decl_token) = stream.take_match(TokenKind::Identifier, &["var"])? {
402            let identifier = stream.expect_match(TokenKind::Identifier, &[])?;
403
404            let type_token = if stream.take_match(TokenKind::Symbol, &[":"])?.is_some() {
405                Some(TypeHint::parse(stream)?)
406            } else {
407                None
408            };
409
410            let equals_token = stream.expect_match(TokenKind::Symbol, &["="])?;
411            let expression = RightHandExpression::parse(stream)?;
412            let semicolon = stream.expect_match(TokenKind::Symbol, &[";"])?;
413
414            return Ok(ControlFlow::Continue(Statement::VariableDefinition(
415                VariableDefinition {
416                    decl_token,
417                    identifier,
418                    type_token,
419                    equals_token,
420                    initializer: expression,
421                    semicolon,
422                },
423            )));
424        }
425
426        if let Some(if_token) = stream.take_match(TokenKind::Identifier, &["if"])? {
427            let condition = RightHandExpression::parse(stream)?;
428            let body = Body::parse(stream)?;
429
430            let else_branch =
431                if let Some(else_token) = stream.take_match(TokenKind::Identifier, &["else"])? {
432                    let else_body = Body::parse(stream)?;
433
434                    Some(Else {
435                        else_token,
436                        else_body,
437                    })
438                } else {
439                    None
440                };
441
442            return Ok(ControlFlow::Continue(Statement::If(If {
443                if_token,
444                condition,
445                body,
446                else_branch,
447            })));
448        }
449
450        if let Some(loop_token) = stream.take_match(TokenKind::Identifier, &["loop"])? {
451            let body = Body::parse(stream)?;
452            return Ok(ControlFlow::Continue(Statement::Loop(Loop {
453                loop_token,
454                body,
455            })));
456        }
457
458        if let Some(while_token) = stream.take_match(TokenKind::Identifier, &["while"])? {
459            // Desugar while into loop { if condition { loop_body; } else { break; } }
460            let condition = RightHandExpression::parse(stream)?;
461            let body = Body::parse(stream)?;
462            return Ok(ControlFlow::Continue(Statement::Loop(Loop {
463                loop_token: while_token,
464                body: Body {
465                    opening_brace: body.opening_brace,
466                    closing_brace: body.closing_brace,
467                    statements: vec![Statement::If(If {
468                        if_token: while_token,
469                        condition: condition.clone(),
470                        body: body.clone(),
471                        else_branch: Some(Else {
472                            else_token: while_token,
473                            else_body: Body {
474                                opening_brace: body.opening_brace,
475                                closing_brace: body.closing_brace,
476                                statements: vec![Statement::Break(Break {
477                                    break_token: while_token,
478                                    semicolon: while_token,
479                                })],
480                            },
481                        }),
482                    })],
483                },
484            })));
485        }
486
487        if let Some(break_token) = stream.take_match(TokenKind::Identifier, &["break"])? {
488            let semicolon = stream.expect_match(TokenKind::Symbol, &[";"])?;
489            // Continue, unreachable code is allowed
490            return Ok(ControlFlow::Continue(Statement::Break(Break {
491                break_token,
492                semicolon,
493            })));
494        }
495        if let Some(continue_token) = stream.take_match(TokenKind::Identifier, &["continue"])? {
496            let semicolon = stream.expect_match(TokenKind::Symbol, &[";"])?;
497            // Continue, unreachable code is allowed
498            return Ok(ControlFlow::Continue(Statement::Continue(Continue {
499                continue_token,
500                semicolon,
501            })));
502        }
503
504        if let Ok(Some(_)) = stream.peek_match(TokenKind::Symbol, &["{"]) {
505            return Ok(ControlFlow::Continue(Statement::Scope(Body::parse(
506                stream,
507            )?)));
508        }
509
510        let save = stream.clone();
511        let expression = Expression::parse(stream)?;
512        match stream.take_match(TokenKind::Symbol, &[";"])? {
513            Some(semicolon) => Ok(ControlFlow::Continue(Statement::Expression {
514                expression,
515                semicolon,
516            })),
517            None => {
518                // No semicolon, re-parse as a right-hand expression
519                *stream = save;
520                let expression = RightHandExpression::parse(stream)?;
521
522                Ok(ControlFlow::Break(Statement::ImplicitReturn(expression)))
523            }
524        }
525    }
526}
527
528impl<T> Literal<T>
529where
530    T: TypeSet,
531{
532    fn parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
533        let token = stream.peek_expect()?;
534
535        let token_source = stream.source(token.location);
536        let location = token.location;
537
538        let literal_value = match token.kind {
539            TokenKind::BinaryInteger => {
540                let token_source = token_source.replace("_", "");
541                Self::parse_integer_literal(&token_source[2..], 2)
542                    .map_err(|_| stream.error("Invalid binary integer literal"))?
543            }
544            TokenKind::DecimalInteger => {
545                let token_source = token_source.replace("_", "");
546                Self::parse_integer_literal(&token_source, 10)
547                    .map_err(|_| stream.error("Invalid integer literal"))?
548            }
549            TokenKind::HexInteger => {
550                let token_source = token_source.replace("_", "");
551                Self::parse_integer_literal(&token_source[2..], 16)
552                    .map_err(|_| stream.error("Invalid hexadecimal integer literal"))?
553            }
554            TokenKind::Float => {
555                let token_source = token_source.replace("_", "");
556                <T::Float as FloatParser>::parse(&token_source)
557                    .map(LiteralValue::Float)
558                    .map_err(|_| stream.error("Invalid float literal"))?
559            }
560            TokenKind::String => match unescape(&token_source[1..token_source.len() - 1]) {
561                Ok(string) => LiteralValue::String(string),
562                Err(offset) => {
563                    return Err(Error {
564                        error: String::from("Invalid escape sequence in string literal")
565                            .into_boxed_str(),
566                        location: Location {
567                            start: token.location.start + offset,
568                            end: token.location.start + offset + 1,
569                        },
570                    });
571                }
572            },
573            TokenKind::Identifier if token_source == "true" => LiteralValue::Boolean(true),
574            TokenKind::Identifier if token_source == "false" => LiteralValue::Boolean(false),
575            _ => return Err(stream.error("Expected literal (number, string, or boolean)")),
576        };
577
578        stream.expect_match(token.kind, &[])?;
579        Ok(Self {
580            value: literal_value,
581            location,
582        })
583    }
584
585    fn parse_integer_literal(
586        token_source: &str,
587        radix: u32,
588    ) -> Result<LiteralValue<T>, ParseIntError> {
589        <T::Integer as IntParser>::parse(token_source, radix).map(LiteralValue::Integer)
590    }
591}
592
593fn unescape(s: &str) -> Result<String, usize> {
594    let mut result = String::new();
595    let mut escaped = false;
596    for (i, c) in s.char_indices().peekable() {
597        if escaped {
598            match c {
599                'n' => result.push('\n'),
600                't' => result.push('\t'),
601                '\\' => result.push('\\'),
602                '"' => result.push('"'),
603                '\'' => result.push('\''),
604                _ => return Err(i), // Invalid escape sequence
605            }
606            escaped = false;
607        } else if c == '\\' {
608            escaped = true;
609        } else {
610            result.push(c);
611        }
612    }
613
614    Ok(result)
615}
616
617impl LeftHandExpression {
618    fn parse<T: TypeSet>(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
619        const UNARY_OPERATORS: &[&str] = &["*"];
620        let expr = if let Some(operator) = stream.take_match(TokenKind::Symbol, UNARY_OPERATORS)? {
621            Self::Deref {
622                operator,
623                name: Self::parse_name::<T>(stream)?,
624            }
625        } else {
626            Self::Name {
627                variable: Self::parse_name::<T>(stream)?,
628            }
629        };
630
631        Ok(expr)
632    }
633
634    fn parse_name<T: TypeSet>(
635        stream: &mut TokenStream<'_, impl Tokenizer>,
636    ) -> Result<Token, Error> {
637        let token = stream.peek_expect()?;
638        match token.kind {
639            TokenKind::Identifier => {
640                // true, false?
641                match Literal::<T>::parse(stream) {
642                    Ok(_) => Err(Error {
643                        error: "Parse error: Literals are not valid on the left-hand side"
644                            .to_string()
645                            .into_boxed_str(),
646                        location: token.location,
647                    }),
648                    _ => stream
649                        .take_match(TokenKind::Identifier, &[])
650                        .map(|v| v.unwrap()),
651                }
652            }
653            _ => Err(stream.error("Expected variable name or deref operator")),
654        }
655    }
656}
657
658impl<T> Expression<T>
659where
660    T: TypeSet,
661{
662    fn parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
663        let save = stream.clone();
664
665        let expression = RightHandExpression::<T>::parse(stream)?;
666
667        if let Ok(Some(operator)) = stream.take_match(TokenKind::Symbol, &["="]) {
668            // Re-parse as an assignment
669            *stream = save;
670            let left_expr = LeftHandExpression::parse::<T>(stream)?;
671            stream.expect_match(TokenKind::Symbol, &["="])?;
672            let right_expr = RightHandExpression::parse(stream)?;
673
674            Ok(Self::Assignment {
675                left_expr,
676                operator,
677                right_expr,
678            })
679        } else {
680            Ok(Self::Expression { expression })
681        }
682    }
683}
684
685impl<T> RightHandExpression<T>
686where
687    T: TypeSet,
688{
689    fn parse(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
690        // We define the binary operators from the lowest precedence to the highest.
691        // Each recursive call to `parse_binary` will handle one level of precedence, and pass
692        // the rest to the inner calls of `parse_binary`.
693        let operators: &[&[&str]] = &[
694            &["||"],
695            &["&&"],
696            &["<", "<=", ">", ">=", "==", "!="],
697            &["|"],
698            &["^"],
699            &["&"],
700            &["<<", ">>"],
701            &["+", "-"],
702            &["*", "/", "%"],
703        ];
704
705        Self::parse_binary(stream, operators)
706    }
707
708    fn parse_binary(
709        stream: &mut TokenStream<'_, impl Tokenizer>,
710        binary_operators: &[&[&str]],
711    ) -> Result<Self, Error> {
712        let Some((current, higher)) = binary_operators.split_first() else {
713            unreachable!("At least one operator set is expected");
714        };
715
716        let mut expr = if higher.is_empty() {
717            Self::parse_unary(stream)?
718        } else {
719            Self::parse_binary(stream, higher)?
720        };
721
722        while let Some(operator) = stream.take_match(TokenKind::Symbol, current)? {
723            let rhs = if higher.is_empty() {
724                Self::parse_unary(stream)?
725            } else {
726                Self::parse_binary(stream, higher)?
727            };
728
729            expr = Self::BinaryOperator {
730                name: operator,
731                operands: Box::new([expr, rhs]),
732            };
733        }
734
735        Ok(expr)
736    }
737
738    fn parse_unary(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
739        const UNARY_OPERATORS: &[&str] = &["!", "-", "&", "*"];
740        if let Some(operator) = stream.take_match(TokenKind::Symbol, UNARY_OPERATORS)? {
741            let operand = Self::parse_unary(stream)?;
742            Ok(Self::UnaryOperator {
743                name: operator,
744                operand: Box::new(operand),
745            })
746        } else {
747            Self::parse_primary(stream)
748        }
749    }
750
751    fn parse_primary(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
752        let token = stream.peek_expect()?;
753
754        match token.kind {
755            TokenKind::Identifier => {
756                // true, false?
757                if let Ok(literal) = Literal::<T>::parse(stream) {
758                    return Ok(Self::Literal { value: literal });
759                }
760
761                Self::parse_call(stream)
762            }
763            TokenKind::Symbol if stream.source(token.location) == "(" => {
764                stream.take_match(token.kind, &[])?;
765                let expr = Self::parse(stream)?;
766                stream.expect_match(TokenKind::Symbol, &[")"])?;
767                Ok(expr)
768            }
769            TokenKind::HexInteger
770            | TokenKind::DecimalInteger
771            | TokenKind::BinaryInteger
772            | TokenKind::Float
773            | TokenKind::String => Literal::<T>::parse(stream).map(|value| Self::Literal { value }),
774            _ => Err(stream.error("Expected variable, literal, or '('")),
775        }
776    }
777
778    fn parse_call(stream: &mut TokenStream<'_, impl Tokenizer>) -> Result<Self, Error> {
779        let token = stream.expect_match(TokenKind::Identifier, &[])?;
780
781        if stream.take_match(TokenKind::Symbol, &["("])?.is_none() {
782            return Ok(Self::Variable { variable: token });
783        };
784
785        let mut arguments = Vec::new();
786        while stream.peek_match(TokenKind::Symbol, &[")"])?.is_none() {
787            let arg = Self::parse(stream)?;
788            arguments.push(arg);
789
790            if stream.take_match(TokenKind::Symbol, &[","])?.is_none() {
791                break;
792            }
793        }
794        stream.expect_match(TokenKind::Symbol, &[")"])?;
795
796        Ok(Self::FunctionCall {
797            name: token,
798            arguments: arguments.into_boxed_slice(),
799        })
800    }
801}
802
803#[derive(Clone)]
804struct TokenStream<'s, I>
805where
806    I: Tokenizer,
807{
808    source: &'s str,
809    tokens: I,
810}
811
812impl<'s, I> TokenStream<'s, I>
813where
814    I: Tokenizer,
815{
816    fn new(source: &'s str, tokens: I) -> Self {
817        TokenStream { source, tokens }
818    }
819
820    /// Fast-forwards the token iterator past comments.
821    fn skip_comments(&mut self) -> Result<(), Error> {
822        let mut peekable = self.tokens.clone();
823        while let Some(token) = peekable.next() {
824            if token?.kind == TokenKind::Comment {
825                self.tokens = peekable.clone();
826            } else {
827                break;
828            }
829        }
830
831        Ok(())
832    }
833
834    fn end(&mut self) -> Result<bool, Error> {
835        self.skip_comments()?;
836
837        Ok(self.tokens.clone().next().is_none())
838    }
839
840    fn peek(&mut self) -> Result<Option<Token>, Error> {
841        self.skip_comments()?;
842
843        match self.tokens.clone().next() {
844            Some(Ok(token)) => Ok(Some(token)),
845            Some(Err(error)) => Err(error),
846            None => Ok(None),
847        }
848    }
849
850    fn peek_expect(&mut self) -> Result<Token, Error> {
851        self.peek()?.ok_or_else(|| Error {
852            location: Location {
853                start: self.source.len(),
854                end: self.source.len(),
855            },
856            error: String::from("Unexpected end of input").into_boxed_str(),
857        })
858    }
859
860    fn peek_match(
861        &mut self,
862        token_kind: TokenKind,
863        source: &[&str],
864    ) -> Result<Option<Token>, Error> {
865        let Some(token) = self.peek()? else {
866            return Ok(None);
867        };
868
869        let peeked = if token.kind == token_kind
870            && (source.is_empty() || source.contains(&self.source(token.location)))
871        {
872            Some(token)
873        } else {
874            None
875        };
876
877        Ok(peeked)
878    }
879
880    fn take_match(
881        &mut self,
882        token_kind: TokenKind,
883        source: &[&str],
884    ) -> Result<Option<Token>, Error> {
885        self.peek_match(token_kind, source).map(|token| {
886            if let Some(token) = token {
887                self.tokens.next();
888                Some(token)
889            } else {
890                None
891            }
892        })
893    }
894
895    /// Takes the next token if it matches the expected kind and source.
896    /// Returns an error if the token does not match.
897    ///
898    /// If `source` is empty, it only checks the token kind.
899    /// If `source` is not empty, it checks if the token's source matches any of the provided strings.
900    fn expect_match(&mut self, token_kind: TokenKind, source: &[&str]) -> Result<Token, Error> {
901        if let Some(token) = self.take_match(token_kind, source)? {
902            Ok(token)
903        } else {
904            let token = self.peek()?;
905            let found = if let Some(token) = token {
906                if token.kind == token_kind {
907                    format!("found '{}'", self.source(token.location))
908                } else {
909                    format!("found {:?}", token.kind)
910                }
911            } else {
912                "reached end of input".to_string()
913            };
914            match source {
915                [] => Err(self.error(format_args!("Expected {token_kind:?}, {found}"))),
916                [s] => Err(self.error(format_args!("Expected '{s}', {found}"))),
917                _ => Err(self.error(format_args!("Expected one of {source:?}, {found}"))),
918            }
919        }
920    }
921
922    fn error(&self, message: impl Display) -> Error {
923        Error {
924            error: format!("Parse error: {message}").into_boxed_str(),
925            location: if let Some(Ok(token)) = self.tokens.clone().next() {
926                token.location
927            } else {
928                Location {
929                    start: self.source.len(),
930                    end: self.source.len(),
931                }
932            },
933        }
934    }
935
936    fn source(&self, location: Location) -> &'s str {
937        location.extract(self.source)
938    }
939}
940
941pub fn parse<T>(source: &str) -> Result<Program<T>, Error>
942where
943    T: TypeSet,
944{
945    let tokens = crate::lexer::tokenize(source);
946    let mut stream = TokenStream::new(source, tokens);
947
948    Program::<T>::parse(&mut stream)
949}
950
951pub fn parse_expression<T>(source: &str) -> Result<Expression<T>, Error>
952where
953    T: TypeSet,
954{
955    let tokens = crate::lexer::tokenize(source);
956    let mut stream = TokenStream::new(source, tokens);
957
958    Expression::<T>::parse(&mut stream)
959}
960
961#[cfg(test)]
962mod tests {
963    use super::*;
964
965    #[test]
966    fn test_unescape() {
967        assert_eq!(unescape(r#"Hello\nWorld\t!"#).unwrap(), "Hello\nWorld\t!");
968        assert_eq!(unescape(r#"Hello\\World"#).unwrap(), "Hello\\World");
969        assert_eq!(unescape(r#"Hello\zWorld"#), Err(6)); // Invalid escape sequence
970    }
971
972    #[test]
973    fn test_out_of_range_literal() {
974        let source = "0x100000000";
975
976        let result = parse_expression::<TypeSet32>(source).expect_err("Parsing should fail");
977        assert_eq!(
978            "Parse error: Invalid hexadecimal integer literal",
979            result.error.as_ref()
980        );
981    }
982
983    #[test]
984    fn test_grouped_numeric_literal() {
985        let source = "1_000_000";
986
987        let result = parse_expression::<TypeSet32>(source).unwrap();
988
989        assert_eq!(source, result.location().extract(source));
990    }
991
992    #[test]
993    fn test_parse_strings() {
994        type LitVal = LiteralValue<TypeSet32>;
995
996        let test_data = [
997            (r#""hel_lo""#, Ok(LitVal::String(r#"hel_lo"#.to_string()))),
998            ("1_000", Ok(LitVal::Integer(1000))),
999            ("true", Ok(LitVal::Boolean(true))),
1000            ("false", Ok(LitVal::Boolean(false))),
1001            (
1002                "fal_se",
1003                Err("Parse error: Expected literal (number, string, or boolean)"),
1004            ),
1005        ];
1006
1007        for (source, expected) in test_data {
1008            let tokens = crate::lexer::tokenize(source);
1009            let mut stream = TokenStream::new(source, tokens);
1010
1011            let lit = match Literal::<TypeSet32>::parse(&mut stream) {
1012                Ok(lit) => lit,
1013                Err(err) => {
1014                    let Err(expected_err) = expected else {
1015                        panic!(
1016                            "Expected parsing to succeed, but it failed with error: {}",
1017                            err
1018                        );
1019                    };
1020                    assert_eq!(expected_err, err.to_string());
1021                    continue;
1022                }
1023            };
1024
1025            let Ok(expected) = expected else {
1026                panic!("Expected error, but `{source}` was parsed successfully");
1027            };
1028
1029            match (lit.value, expected) {
1030                (LitVal::Integer(a), LitVal::Integer(b)) => assert_eq!(a, b),
1031                (LitVal::Float(a), LitVal::Float(b)) => assert_eq!(a, b),
1032                (LitVal::String(a), LitVal::String(b)) => assert_eq!(a, b),
1033                (LitVal::Boolean(a), LitVal::Boolean(b)) => assert_eq!(a, b),
1034                _ => panic!("Unexpected literal type"),
1035            }
1036        }
1037    }
1038}