chasm_rs/
compiler.rs

1use logos::{Logos, Span, SpannedIter};
2use std::collections::HashMap;
3use std::io::Write;
4use std::num::ParseFloatError;
5
6/// Tokens of the chasm language, based completely on the scanner of the original implementation:
7/// https://github.com/ColinEberhardt/chasm/blob/master/src/tokenizer.ts#L41
8/// There are some differences, but I hope that they are equivalent
9#[derive(Logos, PartialEq, Eq, Debug, Clone, Copy)]
10pub enum Token {
11    // this regex for number doesn't make a lot of sense, but it is like that in the original
12    #[regex(r"-?[.0-9]+([eE]-?[0-9][0-9])?")]
13    Number,
14    #[token("print")]
15    Print,
16    #[token("var")]
17    Var,
18    #[token("while")]
19    While,
20    #[token("endwhile")]
21    EndWhile,
22    #[token("if")]
23    If,
24    #[token("endif")]
25    EndIf,
26    #[token("else")]
27    Else,
28    #[token("proc")]
29    Proc,
30    #[token("endproc")]
31    EndProc,
32    #[token(",")]
33    Comma,
34    #[regex(r"(\+|-|\*|/|==|<|>|&&)")]
35    Operator,
36    #[regex(r"[a-zA-Z]+")]
37    Identifier,
38    #[token("=")]
39    Assignment,
40    #[token("(")]
41    LeftParen,
42    #[token(")")]
43    RightParen,
44    #[error]
45    #[regex(r"\s+", logos::skip)]
46    Error,
47    Eof,
48}
49impl Token {
50    /// Return a reference to a static value with the same variant that self
51    fn to_static(self) -> &'static Self {
52        match self {
53            Token::Number => &Token::Number,
54            Token::Print => &Token::Print,
55            Token::Var => &Token::Var,
56            Token::While => &Token::While,
57            Token::EndWhile => &Token::EndWhile,
58            Token::If => &Token::If,
59            Token::EndIf => &Token::EndIf,
60            Token::Else => &Token::Else,
61            Token::Proc => &Token::Proc,
62            Token::EndProc => &Token::EndProc,
63            Token::Comma => &Token::Comma,
64            Token::Operator => &Token::Operator,
65            Token::Identifier => &Token::Identifier,
66            Token::Assignment => &Token::Assignment,
67            Token::LeftParen => &Token::LeftParen,
68            Token::RightParen => &Token::RightParen,
69            Token::Error => &Token::Error,
70            Token::Eof => &Token::Eof,
71        }
72    }
73}
74impl std::fmt::Display for Token {
75    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
76        let s = match self {
77            Token::Number => "<number>",
78            Token::Print => "\"print\"",
79            Token::Var => "\"var\"",
80            Token::While => "\"while\"",
81            Token::EndWhile => "\"endwhile\"",
82            Token::If => "\"if\"",
83            Token::EndIf => "\"endif\"",
84            Token::Else => "\"else\"",
85            Token::Proc => "\"proc\"",
86            Token::EndProc => "\"endproc\"",
87            Token::Comma => "\",\"",
88            Token::Operator => "<operator>",
89            Token::Identifier => "<identifier>",
90            Token::Assignment => "\"=\"",
91            Token::LeftParen => "\"(\"",
92            Token::RightParen => "\")\"",
93            Token::Error => "<error>",
94            Token::Eof => "<eof>",
95        };
96        write!(f, "{}", s)
97    }
98}
99
100use crate::wasm_macro::wasm;
101
102/// Print a slice in the format "0, 1, 2 or 3"
103struct OrList<'a>(&'a [Token]);
104impl std::fmt::Display for OrList<'_> {
105    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
106        let len = self.0.len();
107        if len == 0 {
108            return write!(f, "nothing");
109        }
110        write!(f, "{}", self.0[0])?;
111        if len == 1 {
112            return Ok(());
113        }
114        for t in &self.0[1..len - 1] {
115            write!(f, ", {}", t)?;
116        }
117        write!(f, " or {}", self.0[len - 1])
118    }
119}
120
121/// A compilation error.
122///
123/// Contains a span and a reference to the source code to allow better error formatting.
124#[derive(Debug)]
125pub struct Error<'source> {
126    /// A reference to the source code.
127    pub source: &'source str,
128    /// The byte range of the source code this error is referencing.
129    pub span: Span,
130    /// The type of error.
131    pub kind: ErrorKind,
132}
133impl Error<'_> {
134    /// Get the line and column of the start of the Error's span. The fist line and column are 1.
135    pub fn get_line_column(&self) -> (usize, usize) {
136        self.source
137            .lines()
138            .enumerate()
139            .find_map(|(line, x)| {
140                let start = x.as_ptr() as usize - self.source.as_ptr() as usize;
141                let column = self.span.start - start;
142                (start..start + x.len())
143                    .contains(&self.span.start)
144                    .then(|| (line + 1, column + 1))
145            })
146            .unwrap_or((0, 0))
147    }
148}
149impl std::fmt::Display for Error<'_> {
150    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
151        let (line, column) = self.get_line_column();
152        write!(f, "error at {}:{}: ", line, column)?;
153
154        match &self.kind {
155            ErrorKind::UnexpectedToken { expected, received } => {
156                write!(
157                    f,
158                    "unexpected token value, expected {}, received {}",
159                    OrList(expected),
160                    received
161                )
162            }
163            ErrorKind::ParseFloatError(x) => {
164                write!(f, "failed to parse float number ({})", x)
165            }
166            ErrorKind::ArgumentNumberMismatch { expected, received } => {
167                write!(
168                    f,
169                    "number of arguments mismatch, expected {}, received {}",
170                    expected, received
171                )
172            }
173            ErrorKind::UnexpectedType { expected, received } => {
174                write!(
175                    f,
176                    "unexpected number type, expected {:?}, received {:?}",
177                    expected, received
178                )
179            }
180            ErrorKind::UndeclaredProc { name } => {
181                write!(f, "Undeclared procedural {:?}", name)
182            }
183        }
184    }
185}
186impl std::error::Error for Error<'_> {}
187
188/// The type of compilation error.
189#[derive(Debug, PartialEq, Eq)]
190pub enum ErrorKind {
191    /// The parser was expecting some set of tokens, but received a unexpected one.
192    UnexpectedToken {
193        /// Set of expected tokens
194        expected: &'static [Token],
195        /// Received token
196        received: Token,
197    },
198    /// The parsing of a number in string format to float has failed.
199    ParseFloatError(ParseFloatError),
200    /// There is a mismatch in the number of arguments in a procedure call and a procedure
201    /// definition.
202    ArgumentNumberMismatch {
203        /// Expected number of arguments
204        expected: u32,
205        /// Received number of arguments
206        received: u32,
207    },
208    /// Expected a Type or a pair of Type, but received a unexpected one.
209    UnexpectedType {
210        /// Expected type
211        expected: &'static [Type],
212        /// Received type
213        received: Vec<Type>,
214    },
215    /// There is a procedure call to a undefined procedure.
216    UndeclaredProc {
217        /// The name of the undefined procedure
218        name: String,
219    },
220}
221
222type Res<'s, T = ()> = Result<T, Error<'s>>;
223
224type LocalIdx = u32;
225type FuncIdx = u32;
226
227#[derive(Clone, Copy, Debug, PartialEq, Eq)]
228pub enum Type {
229    I32,
230    F32,
231}
232
233pub struct Procedure {
234    pub idx: FuncIdx,
235    pub num_param: u32,
236    pub code: Vec<u8>,
237}
238
239struct Context {
240    code: Vec<u8>,
241    symbols: HashMap<String, LocalIdx>,
242}
243impl Context {
244    fn local_index_for_symbol(&mut self, symbol: &str) -> LocalIdx {
245        if let Some(idx) = self.symbols.get(symbol) {
246            *idx
247        } else {
248            let len = self.symbols.len() as u32;
249            self.symbols.insert(symbol.to_string(), len);
250            len
251        }
252    }
253}
254
255/// Compile the source code to webassembly code.
256pub struct Parser<'source> {
257    source: &'source str,
258    lexer: SpannedIter<'source, Token>,
259    last: (Token, Span),
260    current: (Token, Span),
261    next: (Token, Span),
262    procedures: HashMap<String, Procedure>,
263}
264impl<'s> Parser<'s> {
265    pub fn parse(source: &'s str) -> Result<Vec<Procedure>, Error<'s>> {
266        let lexer = Token::lexer(source).spanned();
267        let mut parser = Self {
268            source,
269            last: (Token::Error, 0..0),
270            current: (Token::Error, 0..0),
271            next: (Token::Error, 0..0),
272            lexer,
273            procedures: HashMap::new(),
274        };
275        parser.eat_token();
276        parser.eat_token();
277
278        let main_proc = Procedure {
279            idx: 1,
280            num_param: 0,
281            code: Vec::new(),
282        };
283        parser.procedures.insert("main".to_string(), main_proc);
284
285        let mut ctx = Context {
286            code: Vec::new(),
287            symbols: HashMap::new(),
288        };
289
290        // compile statements
291        while parser.current.0 != Token::Eof {
292            parser.statement(&mut ctx)?;
293        }
294        parser.match_token(Token::Eof)?;
295        wasm!(&mut ctx.code, end);
296
297        let locals_index = ctx.code.len();
298
299        // write the vector of locals of the function
300        leb128::write::unsigned(&mut ctx.code, 1).unwrap();
301        leb128::write::unsigned(&mut ctx.code, ctx.symbols.len() as u64).unwrap();
302        wasm!(&mut ctx.code, f32);
303
304        // move locals to the start
305        let len = ctx.code.len();
306        ctx.code.rotate_right(len - locals_index);
307
308        parser.procedures.get_mut("main").unwrap().code = ctx.code;
309
310        let mut procedures: Vec<_> = Vec::with_capacity(parser.procedures.len());
311        for (name, p) in parser.procedures.into_iter() {
312            if p.code.is_empty() {
313                return Err(Error {
314                    source: parser.source,
315                    span: parser.current.1,
316                    kind: ErrorKind::UndeclaredProc { name },
317                });
318            }
319            procedures.push(p);
320        }
321        procedures.sort_by_key(|x| x.idx);
322        Ok(procedures)
323    }
324
325    fn eat_token(&mut self) {
326        self.last = self.current.clone();
327        self.current = self.next.clone();
328        self.next = self.lexer.next().unwrap_or_else(|| {
329            let end = self.source.len();
330            (Token::Eof, end..end)
331        });
332    }
333
334    fn match_token(&mut self, token: Token) -> Res<'s> {
335        if self.current.0 != token {
336            Err(Error {
337                source: self.source,
338                span: self.current.1.clone(),
339                kind: ErrorKind::UnexpectedToken {
340                    expected: std::slice::from_ref(token.to_static()),
341                    received: self.current.clone().0,
342                },
343            })
344        } else {
345            self.eat_token();
346            Ok(())
347        }
348    }
349
350    fn expect_type(&mut self, rec: Type, expec: Type, start: usize) -> Res<'s, Type> {
351        if rec != expec {
352            Err(Error {
353                source: self.source,
354                span: start..self.last.1.end,
355                kind: ErrorKind::UnexpectedType {
356                    expected: std::slice::from_ref(match expec {
357                        Type::I32 => &Type::I32,
358                        Type::F32 => &Type::F32,
359                    }),
360                    received: vec![rec],
361                },
362            })
363        } else {
364            Ok(rec)
365        }
366    }
367
368    fn procedure_from_symbol<'a>(
369        &'a mut self,
370        symbol: &str,
371        num_param: u32,
372    ) -> Res<'s, &'a mut Procedure> {
373        if self.procedures.get_mut(symbol).is_some() {
374            // need to get twice, because of the borrow checker
375            let proc = self.procedures.get_mut(symbol).unwrap();
376
377            // Err($a) ==>> Err(Error { source: self.source, span: self.current.1.clone(), kind: $a })
378            if proc.num_param != num_param {
379                return Err(Error {
380                    source: self.source,
381                    span: self.current.1.clone(),
382                    kind: ErrorKind::ArgumentNumberMismatch {
383                        expected: proc.num_param,
384                        received: num_param,
385                    },
386                });
387            }
388
389            Ok(proc)
390        } else {
391            let idx = (self.procedures.len() + 1) as FuncIdx;
392            let proc = Procedure {
393                idx,
394                num_param,
395                code: Vec::new(),
396            };
397
398            self.procedures.insert(symbol.to_string(), proc);
399            let proc = self.procedures.get_mut(symbol).unwrap();
400
401            Ok(proc)
402        }
403    }
404
405    // parse "<statement>*"
406    fn statement(&mut self, ctx: &mut Context) -> Res<'s> {
407        match self.current.0 {
408            Token::Print => self.print_statement(ctx)?,
409            Token::Var => self.variable_declaration(ctx)?,
410            Token::Identifier => match self.next.0 {
411                Token::Assignment => self.variable_assignment(ctx)?,
412                Token::LeftParen => self.proc_call(ctx)?,
413                _ => {
414                    return Err(Error {
415                        source: self.source,
416                        span: self.current.1.clone(),
417                        kind: ErrorKind::UnexpectedToken {
418                            expected: &[Token::Assignment, Token::LeftParen],
419                            received: self.next.clone().0,
420                        },
421                    })
422                }
423            },
424            Token::While => self.while_statement(ctx)?,
425            Token::If => self.if_statement(ctx)?,
426            Token::Proc => self.proc_statement()?,
427            _ => {
428                return Err(Error {
429                    source: self.source,
430                    span: self.current.1.clone(),
431                    kind: ErrorKind::UnexpectedToken {
432                        expected: &[Token::Print, Token::Var, Token::Identifier, Token::While],
433                        received: self.current.clone().0,
434                    },
435                })
436            }
437        }
438        Ok(())
439    }
440
441    /// Parse "print <expression>"
442    fn print_statement(&mut self, ctx: &mut Context) -> Res<'s> {
443        self.match_token(Token::Print)?;
444        let start = self.current.1.start;
445        let expr = self.expression(ctx)?;
446        self.expect_type(expr, Type::F32, start)?;
447        wasm!(&mut ctx.code, (call 0x0));
448        Ok(())
449    }
450
451    /// Parse "var <ident> = <expression>"
452    fn variable_declaration(&mut self, ctx: &mut Context) -> Res<'s> {
453        // the "var" keyword is purely aesthetic
454        self.match_token(Token::Var)?;
455
456        self.variable_assignment(ctx)
457    }
458
459    /// Parse "<ident> = <expression>"
460    fn variable_assignment(&mut self, ctx: &mut Context) -> Res<'s> {
461        let ident = self.current.clone();
462        self.match_token(Token::Identifier)?;
463        let idx = ctx.local_index_for_symbol(&self.source[ident.1]);
464
465        self.match_token(Token::Assignment)?;
466
467        let start = self.current.1.start;
468        let expr = self.expression(ctx)?;
469        self.expect_type(expr, Type::F32, start)?;
470        wasm!(&mut ctx.code, local.set idx);
471        Ok(())
472    }
473
474    /// Parse "<ident> ( <args>,* )"
475    fn proc_call(&mut self, ctx: &mut Context) -> Res<'s> {
476        let symbol = self.current.clone();
477        self.match_token(Token::Identifier)?;
478        let ident = &self.source[symbol.1];
479
480        self.match_token(Token::LeftParen)?;
481
482        // setpixel calls are hardcoded in the compiler
483        if ident == "setpixel" {
484            // yes, setpixel calls cause side effects in variables
485            let start = self.current.1.start;
486            let expr = self.expression(ctx)?;
487            self.expect_type(expr, Type::F32, start)?;
488            let x_idx = ctx.local_index_for_symbol("x");
489            wasm!(&mut ctx.code, local.set x_idx);
490
491            self.match_token(Token::Comma)?;
492
493            let start = self.current.1.start;
494            let expr = self.expression(ctx)?;
495            self.expect_type(expr, Type::F32, start)?;
496            let y_idx = ctx.local_index_for_symbol("y");
497            wasm!(&mut ctx.code, local.set y_idx);
498
499            self.match_token(Token::Comma)?;
500
501            let start = self.current.1.start;
502            let expr = self.expression(ctx)?;
503            self.expect_type(expr, Type::F32, start)?;
504            let color_idx = ctx.local_index_for_symbol("color");
505            wasm!(&mut ctx.code, local.set color_idx);
506
507            wasm!(&mut ctx.code,
508                // compute ((y*100) + x)
509                (local.get y_idx)
510                (f32.const 100.0)
511                (f32.mul)
512                (local.get x_idx)
513                (f32.add)
514                // convert to integer
515                (i32.trunc_f32_s)
516                // fetch color
517                (local.get color_idx)
518                (i32.trunc_f32_s)
519                // write to memory
520                (i32.store8 0 0)
521            );
522
523            self.match_token(Token::RightParen)?;
524        } else {
525            let mut n = 0;
526            while self.current.0 != Token::RightParen {
527                let start = self.current.1.start;
528                let expr = self.expression(ctx)?;
529                self.expect_type(expr, Type::F32, start)?;
530                n += 1;
531                if self.current.0 != Token::RightParen {
532                    self.match_token(Token::Comma)?;
533                } else {
534                    break;
535                }
536            }
537            self.match_token(Token::RightParen)?;
538
539            let idx = self.procedure_from_symbol(ident, n)?.idx;
540
541            wasm!(&mut ctx.code, call idx);
542        }
543        Ok(())
544    }
545
546    /// Parse "while <expression> <statements>* endwhile"
547    fn while_statement(&mut self, ctx: &mut Context) -> Res<'s> {
548        self.match_token(Token::While)?;
549
550        // start a block, and a loop block
551        wasm!(&mut ctx.code, (block) (loop));
552
553        // if the expression is false, jump to the end of the block
554        let start = self.current.1.start;
555        let expr = self.expression(ctx)?;
556        self.expect_type(expr, Type::I32, start)?;
557        wasm!(&mut ctx.code, (i32.eqz) (br_if 1));
558
559        while self.current.0 != Token::EndWhile {
560            self.statement(ctx)?;
561        }
562
563        self.match_token(Token::EndWhile)?;
564
565        // jump to the start of the loop block
566        wasm!(&mut ctx.code, (br 0) (end) (end));
567
568        Ok(())
569    }
570
571    /// Parse "if <expresion> <expression>* endif" or "if <expression> <expression>* else
572    /// <expression>* endif"
573    fn if_statement(&mut self, ctx: &mut Context) -> Res<'s> {
574        self.match_token(Token::If)?;
575
576        // condition
577        let start = self.current.1.start;
578        let expr = self.expression(ctx)?;
579        self.expect_type(expr, Type::I32, start)?;
580
581        wasm!(&mut ctx.code, if);
582
583        while !(self.current.0 == Token::EndIf || self.current.0 == Token::Else) {
584            self.statement(ctx)?;
585        }
586        if self.current.0 == Token::Else {
587            self.match_token(Token::Else)?;
588            wasm!(&mut ctx.code, else);
589            while self.current.0 != Token::EndIf {
590                self.statement(ctx)?;
591            }
592        }
593
594        self.match_token(Token::EndIf)?;
595        wasm!(&mut ctx.code, end);
596
597        Ok(())
598    }
599
600    /// Parse "proc <ident> ( <args>,* ) <statement>* endproc"
601    fn proc_statement(&mut self) -> Res<'s> {
602        self.match_token(Token::Proc)?;
603
604        let name = self.current.clone();
605        self.match_token(Token::Identifier)?;
606        let name = &self.source[name.1];
607
608        let mut args = Vec::new();
609
610        self.match_token(Token::LeftParen)?;
611        while self.current.0 != Token::RightParen {
612            let arg = self.current.clone();
613            self.match_token(Token::Identifier)?;
614
615            let arg = &self.source[arg.1];
616            args.push(arg.to_string());
617
618            if self.current.0 != Token::RightParen {
619                self.match_token(Token::Comma)?;
620            } else {
621                break;
622            }
623        }
624        self.match_token(Token::RightParen)?;
625
626        let num_param = args.len() as u32;
627        self.procedure_from_symbol(name, num_param)?;
628
629        let mut ctx = Context {
630            code: Vec::new(),
631            // function arguments are the starting locals index
632            symbols: args.into_iter().zip(0..).collect(),
633        };
634
635        while self.current.0 != Token::EndProc {
636            self.statement(&mut ctx)?;
637        }
638        self.match_token(Token::EndProc)?;
639        wasm!(&mut ctx.code, end);
640
641        let locals_index = ctx.code.len();
642
643        // write the vector of locals of the function
644        leb128::write::unsigned(&mut ctx.code, 1).unwrap();
645        leb128::write::unsigned(
646            &mut ctx.code,
647            // don't need to add locals for the argumentes
648            (ctx.symbols.len() - num_param as usize) as u64,
649        )
650        .unwrap();
651        wasm!(&mut ctx.code, f32);
652
653        // move locals to the start
654        let len = ctx.code.len();
655        ctx.code.rotate_right(len - locals_index);
656
657        self.procedure_from_symbol(name, num_param).unwrap().code = ctx.code;
658
659        Ok(())
660    }
661
662    /// Parse "<number>" or "<ident>" or "( <expression> <op> <expression> )"
663    fn expression(&mut self, ctx: &mut Context) -> Res<'s, Type> {
664        match self.current.0 {
665            Token::Number => {
666                let number = match self.source[self.current.1.clone()].parse::<f32>() {
667                    Ok(x) => x,
668                    Err(err) => {
669                        return Err(Error {
670                            source: self.source,
671                            span: self.current.1.clone(),
672                            kind: ErrorKind::ParseFloatError(err),
673                        })
674                    }
675                };
676                self.match_token(Token::Number)?;
677                wasm!(&mut ctx.code, (f32.const number));
678                Ok(Type::F32)
679            }
680            Token::Identifier => {
681                let ident = self.current.clone();
682                self.match_token(Token::Identifier)?;
683
684                let symbol = &self.source[ident.1];
685                let idx = ctx.local_index_for_symbol(symbol);
686
687                wasm!(&mut ctx.code, local.get idx);
688                Ok(Type::F32)
689            }
690            Token::LeftParen => {
691                self.match_token(Token::LeftParen)?;
692
693                // left
694                let type_a = self.expression(ctx)?;
695
696                let op_token = self.current.clone();
697                self.match_token(Token::Operator)?;
698                let op = &self.source[op_token.1.clone()];
699
700                // right
701                let type_b = self.expression(ctx)?;
702
703                // op
704                match op {
705                    "+" | "-" | "*" | "/" | "<" | ">" | "==" => {
706                        if type_a != Type::F32 || type_b != Type::F32 {
707                            return Err(Error {
708                                source: self.source,
709                                span: op_token.1,
710                                kind: ErrorKind::UnexpectedType {
711                                    expected: &[Type::F32, Type::F32],
712                                    received: vec![type_a, type_b],
713                                },
714                            });
715                        }
716                    }
717                    "&&" => {
718                        if type_a != Type::I32 || type_b != Type::I32 {
719                            return Err(Error {
720                                source: self.source,
721                                span: op_token.1,
722                                kind: ErrorKind::UnexpectedType {
723                                    expected: &[Type::I32, Type::I32],
724                                    received: vec![type_a, type_b],
725                                },
726                            });
727                        }
728                    }
729                    _ => unreachable!("I already match the token operator"),
730                }
731                match op {
732                    "+" => wasm!(&mut ctx.code, f32.add),
733                    "-" => wasm!(&mut ctx.code, f32.sub),
734                    "*" => wasm!(&mut ctx.code, f32.mul),
735                    "/" => wasm!(&mut ctx.code, f32.div),
736                    "==" => wasm!(&mut ctx.code, f32.eq),
737                    "<" => wasm!(&mut ctx.code, f32.lt),
738                    ">" => wasm!(&mut ctx.code, f32.gt),
739                    "&&" => wasm!(&mut ctx.code, i32.and),
740                    _ => unreachable!("I already match the token operator"),
741                }
742
743                self.match_token(Token::RightParen)?;
744
745                match op {
746                    "+" | "-" | "*" | "/" => Ok(Type::F32),
747                    "==" | "<" | ">" | "&&" => Ok(Type::I32),
748                    _ => unreachable!("I already match the token operator"),
749                }
750            }
751            _ => Err(Error {
752                source: self.source,
753                span: self.current.1.clone(),
754                kind: ErrorKind::UnexpectedToken {
755                    expected: &[Token::Number, Token::LeftParen],
756                    received: self.current.clone().0,
757                },
758            }),
759        }
760    }
761}