Skip to main content

ling/parser/
mod.rs

1// src/parser/mod.rs
2pub mod ast;
3pub mod grammar;
4
5use crate::lexer::Token;
6use ast::*;
7
8pub fn parse(source: &str) -> Result<Program, String> {
9    let mut lex = crate::lexer::Lexer::new(source);
10    let mut tokens = Vec::new();
11    while let Some(tok) = lex.next_token() {
12        tokens.push(tok);
13    }
14    tokens.push(Token::Eof);
15    parse_tokens(tokens)
16}
17
18pub fn parse_tokens(tokens: Vec<Token>) -> Result<Program, String> {
19    Parser::new(tokens).parse_program()
20}
21
22// ─── Parser ──────────────────────────────────────────────────────────────────
23
24struct Parser {
25    tokens: Vec<Token>,
26    pos: usize,
27}
28
29impl Parser {
30    fn new(tokens: Vec<Token>) -> Self {
31        Self { tokens, pos: 0 }
32    }
33
34    fn peek(&self) -> &Token {
35        self.tokens.get(self.pos).unwrap_or(&Token::Eof)
36    }
37
38    #[allow(dead_code)]
39    fn peek2(&self) -> &Token {
40        self.tokens.get(self.pos + 1).unwrap_or(&Token::Eof)
41    }
42
43    fn advance(&mut self) -> Token {
44        let tok = self.tokens.get(self.pos).cloned().unwrap_or(Token::Eof);
45        if self.pos < self.tokens.len() {
46            self.pos += 1;
47        }
48        tok
49    }
50
51    fn expect(&mut self, expected: &Token) -> Result<(), String> {
52        let tok = self.advance();
53        if &tok == expected {
54            Ok(())
55        } else {
56            Err(format!("expected {:?}, got {:?}", expected, tok))
57        }
58    }
59
60    /// Parse a name that can be a keyword used as an identifier.
61    fn parse_name(&mut self) -> Result<String, String> {
62        match self.advance() {
63            Token::Ident(s) => Ok(s),
64            // Allow all keywords to serve as bind names (contextual)
65            tok => Ok(token_to_name(&tok)
66                .ok_or_else(|| format!("expected identifier, got {:?}", tok))?
67                .to_string()),
68        }
69    }
70
71    // ─── Top-level ───────────────────────────────────────────────────────────
72
73    fn parse_program(&mut self) -> Result<Program, String> {
74        let mut items = Vec::new();
75        while !matches!(self.peek(), Token::Eof) {
76            items.push(self.parse_item()?);
77        }
78        Ok(Program { items })
79    }
80
81    fn parse_item(&mut self) -> Result<Item, String> {
82        let is_async = if matches!(self.peek(), Token::Async) {
83            self.advance();
84            true
85        } else {
86            false
87        };
88
89        match self.peek().clone() {
90            Token::Bind => {
91                self.advance();
92                let name = self.parse_name()?;
93                self.expect(&Token::Eq)?;
94                let expr = self.parse_expr()?;
95                Ok(Item::Bind(name, expr))
96            }
97            Token::Fn => {
98                self.parse_fn_item(is_async)
99            }
100            Token::Mod => {
101                self.advance();
102                let name = self.parse_name()?;
103                self.expect(&Token::LBrace)?;
104                let mut body = Vec::new();
105                while !matches!(self.peek(), Token::RBrace | Token::Eof) {
106                    body.push(self.parse_item()?);
107                }
108                self.expect(&Token::RBrace)?;
109                Ok(Item::Mod(name, body))
110            }
111            Token::Type => {
112                self.advance();
113                let name = self.parse_name()?;
114                // Skip optional generic params
115                self.skip_generics();
116                self.expect(&Token::As)?;
117                let ty = self.parse_type_str();
118                Ok(Item::TypeAlias(name, ty))
119            }
120            Token::Use => {
121                self.advance();
122                // Expect a string path: use "path/to/module"
123                let path = match self.advance() {
124                    Token::String(s) => s,
125                    tok => return Err(format!("use: expected string path, got {:?}", tok)),
126                };
127                // Optional `as name` alias
128                let alias = if matches!(self.peek(), Token::As) {
129                    self.advance();
130                    Some(self.parse_name()?)
131                } else {
132                    None
133                };
134                Ok(Item::Use { path, alias })
135            }
136            tok => Err(format!("unexpected token at top level: {:?}", tok)),
137        }
138    }
139
140    fn parse_fn_item(&mut self, is_async: bool) -> Result<Item, String> {
141        self.advance(); // consume `fn`
142        let name = self.parse_name()?;
143        self.skip_generics(); // ignore `<T, U>`
144        self.expect(&Token::LParen)?;
145        let params = self.parse_params()?;
146        self.expect(&Token::RParen)?;
147        // Optional return type: `-> Type`
148        if matches!(self.peek(), Token::Arrow) {
149            self.advance();
150            self.parse_type_str();
151        }
152        // Optional `where` clause
153        if matches!(self.peek(), Token::Where) {
154            self.advance();
155            while !matches!(self.peek(), Token::LBrace | Token::Eof) {
156                self.advance();
157            }
158        }
159        self.expect(&Token::LBrace)?;
160        let body = self.parse_block()?;
161        self.expect(&Token::RBrace)?;
162        Ok(Item::Fn(FnDef { name, is_async, params, body }))
163    }
164
165    fn parse_params(&mut self) -> Result<Vec<String>, String> {
166        let mut params = Vec::new();
167        while !matches!(self.peek(), Token::RParen | Token::Eof) {
168            // Skip leading & or own/lend
169            while matches!(self.peek(), Token::Ampersand | Token::Own | Token::Lend) {
170                self.advance();
171            }
172            let name = self.parse_name()?;
173            params.push(name);
174            // Skip `: Type`
175            if matches!(self.peek(), Token::Colon) {
176                self.advance();
177                self.parse_type_str();
178            }
179            if matches!(self.peek(), Token::Comma) {
180                self.advance();
181            }
182        }
183        Ok(params)
184    }
185
186    /// Eat a type expression (until `,`, `)`, `{`, `where`, `>`) — ignored at runtime.
187    fn parse_type_str(&mut self) -> String {
188        let mut depth = 0usize;
189        let mut result = String::new();
190        loop {
191            match self.peek() {
192                Token::Eof => break,
193                Token::Lt => { depth += 1; result.push('<'); self.advance(); }
194                Token::Gt if depth > 0 => { depth -= 1; result.push('>'); self.advance(); }
195                Token::LBrace | Token::Where if depth == 0 => break,
196                Token::RParen | Token::Comma | Token::Semicolon if depth == 0 => break,
197                Token::Arrow if depth == 0 => break,
198                tok => { result.push_str(&format!("{:?}", tok)); self.advance(); }
199            }
200        }
201        result
202    }
203
204    fn skip_generics(&mut self) {
205        if matches!(self.peek(), Token::Lt) {
206            let mut depth = 0;
207            loop {
208                match self.advance() {
209                    Token::Lt => depth += 1,
210                    Token::Gt => { depth -= 1; if depth == 0 { break; } }
211                    Token::Eof => break,
212                    _ => {}
213                }
214            }
215        }
216    }
217
218    // ─── Blocks ──────────────────────────────────────────────────────────────
219
220    fn parse_block(&mut self) -> Result<Vec<Stmt>, String> {
221        let mut stmts = Vec::new();
222        while !matches!(self.peek(), Token::RBrace | Token::Eof) {
223            stmts.push(self.parse_stmt()?);
224            // Optional trailing semicolon
225            if matches!(self.peek(), Token::Semicolon) {
226                self.advance();
227            }
228        }
229        Ok(stmts)
230    }
231
232    fn parse_stmt(&mut self) -> Result<Stmt, String> {
233        match self.peek().clone() {
234            Token::Bind => {
235                self.advance();
236                let name = self.parse_name()?;
237                self.expect(&Token::Eq)?;
238                let expr = self.parse_expr()?;
239                Ok(Stmt::Bind(name, expr))
240            }
241            Token::Return => {
242                self.advance();
243                if matches!(self.peek(), Token::Semicolon | Token::RBrace) {
244                    Ok(Stmt::Return(Expr::Unit))
245                } else {
246                    Ok(Stmt::Return(self.parse_expr()?))
247                }
248            }
249            _ => Ok(Stmt::Expr(self.parse_expr()?)),
250        }
251    }
252
253    // ─── Expressions ─────────────────────────────────────────────────────────
254
255    fn parse_expr(&mut self) -> Result<Expr, String> {
256        self.parse_or_expr()
257    }
258
259    fn parse_or_expr(&mut self) -> Result<Expr, String> {
260        let mut left = self.parse_and_expr()?;
261        while matches!(self.peek(), Token::Or) {
262            self.advance();
263            let right = self.parse_and_expr()?;
264            left = Expr::BinOp(BinOp::Or, Box::new(left), Box::new(right));
265        }
266        Ok(left)
267    }
268
269    fn parse_and_expr(&mut self) -> Result<Expr, String> {
270        let mut left = self.parse_cmp_expr()?;
271        while matches!(self.peek(), Token::And) {
272            self.advance();
273            let right = self.parse_cmp_expr()?;
274            left = Expr::BinOp(BinOp::And, Box::new(left), Box::new(right));
275        }
276        Ok(left)
277    }
278
279    fn parse_cmp_expr(&mut self) -> Result<Expr, String> {
280        let mut left = self.parse_add_expr()?;
281        loop {
282            let op = match self.peek() {
283                Token::EqEq => BinOp::Eq,
284                Token::Ne   => BinOp::Ne,
285                Token::Lt   => BinOp::Lt,
286                Token::Gt   => BinOp::Gt,
287                Token::Le   => BinOp::Le,
288                Token::Ge   => BinOp::Ge,
289                _ => break,
290            };
291            self.advance();
292            let right = self.parse_add_expr()?;
293            left = Expr::BinOp(op, Box::new(left), Box::new(right));
294        }
295        Ok(left)
296    }
297
298    fn parse_add_expr(&mut self) -> Result<Expr, String> {
299        let mut left = self.parse_mul_expr()?;
300        loop {
301            let op = match self.peek() {
302                Token::Plus  => BinOp::Add,
303                Token::Minus => BinOp::Sub,
304                _ => break,
305            };
306            self.advance();
307            let right = self.parse_mul_expr()?;
308            left = Expr::BinOp(op, Box::new(left), Box::new(right));
309        }
310        Ok(left)
311    }
312
313    fn parse_mul_expr(&mut self) -> Result<Expr, String> {
314        let mut left = self.parse_unary_expr()?;
315        loop {
316            let op = match self.peek() {
317                Token::Star    => BinOp::Mul,
318                Token::Slash   => BinOp::Div,
319                Token::Percent => BinOp::Rem,
320                _ => break,
321            };
322            self.advance();
323            let right = self.parse_unary_expr()?;
324            left = Expr::BinOp(op, Box::new(left), Box::new(right));
325        }
326        Ok(left)
327    }
328
329    fn parse_unary_expr(&mut self) -> Result<Expr, String> {
330        match self.peek().clone() {
331            Token::Ampersand => { self.advance(); Ok(Expr::Ref(Box::new(self.parse_postfix_expr()?))) }
332            Token::Not       => { self.advance(); Ok(Expr::BinOp(BinOp::Eq, Box::new(self.parse_postfix_expr()?), Box::new(Expr::Bool(false)))) }
333            Token::Minus     => { self.advance(); Ok(Expr::BinOp(BinOp::Sub, Box::new(Expr::Number(0.0)), Box::new(self.parse_postfix_expr()?))) }
334            Token::Wait      => { self.advance(); Ok(Expr::Await(Box::new(self.parse_postfix_expr()?))) }
335            // Ownership modifiers are hints; just evaluate the inner expression
336            Token::Own | Token::Lend | Token::Share | Token::Move | Token::Copy => {
337                self.advance();
338                self.parse_unary_expr()
339            }
340            _ => self.parse_postfix_expr(),
341        }
342    }
343
344    /// Parse suffix operations: calls, method calls, indexing, `..`, path (::)
345    fn parse_postfix_expr(&mut self) -> Result<Expr, String> {
346        let mut base = self.parse_primary()?;
347
348        loop {
349            match self.peek().clone() {
350                // `.method(args)` or `.field`
351                Token::Dot => {
352                    self.advance();
353                    let method = self.parse_name()?;
354                    if matches!(self.peek(), Token::LParen) {
355                        self.advance();
356                        let args = self.parse_call_args()?;
357                        self.expect(&Token::RParen)?;
358                        base = Expr::MethodCall {
359                            receiver: Box::new(base),
360                            method,
361                            args,
362                        };
363                    } else {
364                        // field access — treat as method call with no args
365                        base = Expr::MethodCall {
366                            receiver: Box::new(base),
367                            method,
368                            args: Vec::new(),
369                        };
370                    }
371                }
372                // `::ident` — extend path
373                Token::ColonColon => {
374                    self.advance();
375                    let segment = self.parse_name()?;
376                    // Collect any further `::` segments
377                    base = match base {
378                        Expr::Path(mut segs) => { segs.push(segment); Expr::Path(segs) }
379                        Expr::Ident(s)       => Expr::Path(vec![s, segment]),
380                        other => Expr::Path(vec![format!("{:?}", other), segment]),
381                    };
382                }
383                // `(args)` — call
384                Token::LParen => {
385                    self.advance();
386                    let args = self.parse_call_args()?;
387                    self.expect(&Token::RParen)?;
388                    base = Expr::Call(Box::new(base), args);
389                }
390                // `[idx]`
391                Token::LBracket => {
392                    self.advance();
393                    let idx = self.parse_expr()?;
394                    self.expect(&Token::RBracket)?;
395                    base = Expr::Index(Box::new(base), Box::new(idx));
396                }
397                // `..hi` — range
398                Token::DotDot => {
399                    self.advance();
400                    let hi = self.parse_primary()?;
401                    base = Expr::Range(Box::new(base), Box::new(hi));
402                }
403                _ => break,
404            }
405        }
406        Ok(base)
407    }
408
409    fn parse_call_args(&mut self) -> Result<Vec<Expr>, String> {
410        let mut args = Vec::new();
411        while !matches!(self.peek(), Token::RParen | Token::Eof) {
412            args.push(self.parse_expr()?);
413            if matches!(self.peek(), Token::Comma) { self.advance(); }
414        }
415        Ok(args)
416    }
417
418    fn parse_primary(&mut self) -> Result<Expr, String> {
419        match self.peek().clone() {
420            // Literals
421            Token::String(s) => { self.advance(); Ok(Expr::Str(s)) }
422            Token::Number(n) => { self.advance(); Ok(Expr::Number(n.parse().unwrap_or(0.0))) }
423            Token::Bool(b)   => { self.advance(); Ok(Expr::Bool(b)) }
424
425            // Keywords that start expressions
426            Token::Do => {
427                self.advance();
428                self.expect(&Token::LBrace)?;
429                let stmts = self.parse_block()?;
430                self.expect(&Token::RBrace)?;
431                Ok(Expr::Do(stmts))
432            }
433            Token::LBrace => {
434                self.advance();
435                let stmts = self.parse_block()?;
436                self.expect(&Token::RBrace)?;
437                Ok(Expr::Do(stmts))
438            }
439            Token::If => self.parse_if_expr(),
440            Token::For => self.parse_for_expr(),
441            Token::While => self.parse_while_expr(),
442            Token::Match => self.parse_match_expr(),
443            Token::Return => {
444                self.advance();
445                let val = if matches!(self.peek(), Token::Semicolon | Token::RBrace | Token::Eof) {
446                    Expr::Unit
447                } else {
448                    self.parse_expr()?
449                };
450                Ok(Expr::Do(vec![Stmt::Return(val)]))
451            }
452
453            // Array literal
454            Token::LBracket => {
455                self.advance();
456                let mut elems = Vec::new();
457                while !matches!(self.peek(), Token::RBracket | Token::Eof) {
458                    elems.push(self.parse_expr()?);
459                    if matches!(self.peek(), Token::Comma) { self.advance(); }
460                }
461                self.expect(&Token::RBracket)?;
462                Ok(Expr::Array(elems))
463            }
464
465            // Closure `|| expr` or `|params| expr`
466            Token::Or => {
467                self.advance(); // first |
468                let mut params = Vec::new();
469                // If next is not |, parse params
470                if !matches!(self.peek(), Token::Or) {
471                    while !matches!(self.peek(), Token::Or | Token::Eof) {
472                        params.push(self.parse_name()?);
473                        if matches!(self.peek(), Token::Comma) { self.advance(); }
474                    }
475                }
476                self.advance(); // closing |
477                let body = self.parse_expr()?;
478                Ok(Expr::Closure(params, Box::new(body)))
479            }
480
481            // Grouped expression
482            Token::LParen => {
483                self.advance();
484                if matches!(self.peek(), Token::RParen) {
485                    self.advance();
486                    return Ok(Expr::Unit);
487                }
488                let e = self.parse_expr()?;
489                self.expect(&Token::RParen)?;
490                Ok(e)
491            }
492
493            // Async block
494            Token::Async => {
495                self.advance();
496                let inner = self.parse_expr()?;
497                Ok(inner) // async is a hint; we just execute synchronously
498            }
499
500            // Identifier — could start a path
501            Token::Ident(name) => {
502                self.advance();
503                Ok(Expr::Ident(name))
504            }
505
506            // Allow keywords as expression-position identifiers (e.g. 移动/move)
507            tok => {
508                if let Some(name) = token_to_name(&tok) {
509                    self.advance();
510                    Ok(Expr::Ident(name.to_string()))
511                } else {
512                    Err(format!("unexpected token in expression: {:?}", tok))
513                }
514            }
515        }
516    }
517
518    // ─── if / for / match ────────────────────────────────────────────────────
519
520    fn parse_if_expr(&mut self) -> Result<Expr, String> {
521        self.advance(); // consume `if`
522        let cond = self.parse_cmp_expr()?;
523        self.expect(&Token::LBrace)?;
524        let then = self.parse_block()?;
525        self.expect(&Token::RBrace)?;
526
527        let mut elseifs = Vec::new();
528        let mut else_body = None;
529
530        while matches!(self.peek(), Token::Else) {
531            self.advance(); // consume `else`
532            if matches!(self.peek(), Token::If) {
533                self.advance(); // consume `if`
534                let ei_cond = self.parse_cmp_expr()?;
535                self.expect(&Token::LBrace)?;
536                let ei_body = self.parse_block()?;
537                self.expect(&Token::RBrace)?;
538                elseifs.push((ei_cond, ei_body));
539            } else {
540                self.expect(&Token::LBrace)?;
541                else_body = Some(self.parse_block()?);
542                self.expect(&Token::RBrace)?;
543                break;
544            }
545        }
546
547        Ok(Expr::If { cond: Box::new(cond), then, elseifs, else_body })
548    }
549
550    fn parse_while_expr(&mut self) -> Result<Expr, String> {
551        self.advance(); // consume `while` / `ขณะที่`
552        let cond = self.parse_expr()?;
553        self.expect(&Token::LBrace)?;
554        let body = self.parse_block()?;
555        self.expect(&Token::RBrace)?;
556        Ok(Expr::While { cond: Box::new(cond), body })
557    }
558
559    fn parse_for_expr(&mut self) -> Result<Expr, String> {
560        self.advance(); // consume `for` / `历`
561        let var = self.parse_name()?;
562        self.expect(&Token::In)?;
563        let iter = self.parse_postfix_expr()?;
564        // Handle `0..n` already parsed as Range in postfix
565        self.expect(&Token::LBrace)?;
566        let body = self.parse_block()?;
567        self.expect(&Token::RBrace)?;
568        Ok(Expr::For { var, iter: Box::new(iter), body })
569    }
570
571    fn parse_match_expr(&mut self) -> Result<Expr, String> {
572        self.advance(); // consume `match` / `配`
573        let subject = self.parse_postfix_expr()?;
574        self.expect(&Token::LBrace)?;
575        let mut arms = Vec::new();
576        while !matches!(self.peek(), Token::RBrace | Token::Eof) {
577            let pattern = self.parse_pattern()?;
578            self.expect(&Token::FatArrow)?;
579            let body = self.parse_expr()?;
580            // Optional trailing comma
581            if matches!(self.peek(), Token::Comma) { self.advance(); }
582            arms.push(MatchArm { pattern, body });
583        }
584        self.expect(&Token::RBrace)?;
585        Ok(Expr::Match(Box::new(subject), arms))
586    }
587
588    fn parse_pattern(&mut self) -> Result<Pattern, String> {
589        match self.peek().clone() {
590            Token::Ident(s) if s == "_" => { self.advance(); Ok(Pattern::Wildcard) }
591            Token::String(s) => { self.advance(); Ok(Pattern::Str(s)) }
592            Token::Number(n) => { self.advance(); Ok(Pattern::Number(n.parse().unwrap_or(0.0))) }
593            Token::Bool(b)   => { self.advance(); Ok(Pattern::Bool(b)) }
594            // Ok(inner), Bad(inner), 好(inner), 坏(inner)
595            Token::Ok | Token::Bad => {
596                let ctor_tok = self.advance();
597                let ctor = match ctor_tok {
598                    Token::Ok  => "ok".to_string(),
599                    Token::Bad => "bad".to_string(),
600                    _ => unreachable!(),
601                };
602                if matches!(self.peek(), Token::LParen) {
603                    self.advance();
604                    let inner = self.parse_pattern()?;
605                    self.expect(&Token::RParen)?;
606                    Ok(Pattern::Constructor(ctor, Some(Box::new(inner))))
607                } else {
608                    Ok(Pattern::Constructor(ctor, None))
609                }
610            }
611            _ => {
612                let name = self.parse_name()?;
613                if matches!(self.peek(), Token::LParen) {
614                    self.advance();
615                    let inner = self.parse_pattern()?;
616                    self.expect(&Token::RParen)?;
617                    Ok(Pattern::Constructor(name, Some(Box::new(inner))))
618                } else if name == "_" {
619                    Ok(Pattern::Wildcard)
620                } else {
621                    Ok(Pattern::Ident(name))
622                }
623            }
624        }
625    }
626}
627
628// ─── Helper ──────────────────────────────────────────────────────────────────
629
630/// Convert a keyword token to its string name for use as an identifier.
631fn token_to_name(tok: &Token) -> Option<&'static str> {
632    match tok {
633        Token::Bind   => Some("bind"),   Token::Do     => Some("do"),
634        Token::Fn     => Some("fn"),     Token::Mod    => Some("mod"),
635        Token::Type   => Some("type"),   Token::If     => Some("if"),
636        Token::Else   => Some("else"),   Token::While  => Some("while"),
637        Token::For    => Some("for"),    Token::In     => Some("in"),
638        Token::Match  => Some("match"),  Token::Return => Some("return"),
639        Token::Own    => Some("own"),    Token::Lend   => Some("lend"),
640        Token::Share  => Some("share"),  Token::Move   => Some("move"),
641        Token::Copy   => Some("copy"),   Token::Async  => Some("async"),
642        Token::Wait   => Some("wait"),   Token::As     => Some("as"),
643        Token::Where  => Some("where"),  Token::Post   => Some("post"),
644        Token::Give   => Some("give"),   Token::Fit    => Some("fit"),
645        Token::Form   => Some("form"),   Token::Choose => Some("choose"),
646        Token::Can    => Some("can"),    Token::Change => Some("change"),
647        Token::Stop   => Some("stop"),   Token::Again  => Some("again"),
648        Token::Try    => Some("try"),    Token::Sure   => Some("sure"),
649        Token::Maybe  => Some("maybe"),  Token::Pure   => Some("pure"),
650        Token::Spawn  => Some("spawn"),  Token::Ok     => Some("ok"),
651        Token::Bad    => Some("bad"),    Token::None   => Some("none"),
652        Token::Use    => Some("use"),
653        _ => None,
654    }
655}