koi_parser/
token_tree.rs

1use std::{fmt::Display, ops::Range};
2
3use crate::lexer::{Token, lexer};
4
5/// A token tree node - either a single token or a delimited group
6#[derive(Debug, PartialEq)]
7pub enum TokenTree {
8    /// A single token
9    Token(TokenTreeToken),
10    /// A delimited group of token trees
11    Block(TokenTreeBlock),
12}
13
14#[derive(Debug, PartialEq)]
15pub struct TokenTreeToken {
16    pub token: Token,
17    pub span: Range<usize>,
18}
19
20#[derive(Debug, PartialEq)]
21pub struct TokenTreeBlock {
22    pub delimiter: Block,
23    pub open_span: Range<usize>,
24    pub trees: Vec<TokenTree>,
25    pub close_span: Range<usize>,
26}
27
28impl TokenTreeBlock {
29    fn span(&self) -> Range<usize> {
30        Range {
31            start: self.open_span.start,
32            end: self.open_span.end,
33        }
34    }
35}
36
37impl TokenTree {
38    pub fn token(&self) -> Option<&TokenTreeToken> {
39        match self {
40            TokenTree::Token(token) => Some(token),
41            TokenTree::Block(..) => None,
42        }
43    }
44
45    pub fn block(&self) -> Option<&TokenTreeBlock> {
46        match self {
47            TokenTree::Token(..) => None,
48            TokenTree::Block(block) => Some(block),
49        }
50    }
51
52    pub fn paren_block(&self) -> Option<&TokenTreeBlock> {
53        match self {
54            TokenTree::Token(..) => None,
55            TokenTree::Block(block) => match block.delimiter {
56                Block::Parenthesis => Some(block),
57                Block::Brace => None,
58                Block::Bracket => None,
59            },
60        }
61    }
62
63    pub fn brace_block(&self) -> Option<&TokenTreeBlock> {
64        match self {
65            TokenTree::Token(..) => None,
66            TokenTree::Block(block) => match block.delimiter {
67                Block::Parenthesis => None,
68                Block::Brace => Some(block),
69                Block::Bracket => None,
70            },
71        }
72    }
73
74    pub fn bracket_block(&self) -> Option<&TokenTreeBlock> {
75        match self {
76            TokenTree::Token(..) => None,
77            TokenTree::Block(block) => match block.delimiter {
78                Block::Parenthesis => None,
79                Block::Brace => None,
80                Block::Bracket => Some(block),
81            },
82        }
83    }
84
85    pub fn block_of(&self, delimiter: Block) -> Option<&TokenTreeBlock> {
86        match self {
87            TokenTree::Token(..) => None,
88            TokenTree::Block(block) => {
89                if block.delimiter == delimiter {
90                    return Some(block);
91                } else {
92                    return None;
93                }
94            }
95        }
96    }
97
98    pub fn span(&self) -> Range<usize> {
99        match self {
100            TokenTree::Token(token_tree_token) => token_tree_token.span.clone(),
101            TokenTree::Block(token_tree_block) => token_tree_block.span(),
102        }
103    }
104}
105
106#[derive(Debug, Clone, Copy, PartialEq, Eq)]
107pub enum Block {
108    Parenthesis, // ( )
109    Brace,       // { }
110    Bracket,     // [ ]
111}
112
113impl Display for Block {
114    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
115        match self {
116            Block::Parenthesis => write!(f, "( )"),
117            Block::Brace => write!(f, "{{ }}"),
118            Block::Bracket => write!(f, "[ ]"),
119        }
120    }
121}
122
123impl Block {
124    /// This delimiter as a opening token
125    pub fn opening_token(&self) -> Token {
126        match self {
127            Block::Parenthesis => Token::LParen,
128            Block::Brace => Token::LBrace,
129            Block::Bracket => Token::LBracket,
130        }
131    }
132
133    /// This delimiter as a closing token
134    pub fn closing_token(&self) -> Token {
135        match self {
136            Block::Parenthesis => Token::RParen,
137            Block::Brace => Token::RBrace,
138            Block::Bracket => Token::RBracket,
139        }
140    }
141}
142
143#[derive(Debug, Clone, PartialEq)]
144pub enum ParseError {
145    /// EOF reached before closing delimiter
146    UnexpectedEOF {
147        expected: Block,
148        expected_span: Range<usize>,
149    },
150    /// A closing delimiter found before any opening delimiter
151    UnexpectedClosing {
152        token: Block,
153        span: Range<usize>,
154    },
155    /// Currently looking for closing delimiter but found another closing or opening delimiter
156    MismatchedDelimiter {
157        expected: Block,
158        expected_span: Range<usize>,
159        found: Block,
160        found_span: Range<usize>,
161    },
162}
163
164pub struct TokenTreeParser {
165    tokens: Vec<(Token, Range<usize>)>,
166    pos: usize,
167}
168
169impl TokenTreeParser {
170    pub fn new(tokens: Vec<(Token, Range<usize>)>) -> Self {
171        Self { tokens, pos: 0 }
172    }
173
174    /// Parse the entire token stream into a flat list of token trees
175    pub fn parse(&mut self) -> Result<Vec<TokenTree>, ParseError> {
176        let mut trees = Vec::new();
177
178        while !self.is_at_end() {
179            trees.push(self.parse_tree()?);
180        }
181
182        Ok(trees)
183    }
184
185    /// Parse a single token tree
186    fn parse_tree(&mut self) -> Result<TokenTree, ParseError> {
187        let (token, span) = self.current();
188
189        match token {
190            Token::LParen => self.parse_delimited(Block::Parenthesis),
191            Token::LBrace => self.parse_delimited(Block::Brace),
192            Token::RParen => Err(ParseError::UnexpectedClosing {
193                token: Block::Parenthesis,
194                span: span.clone(),
195            }),
196            Token::RBrace => Err(ParseError::UnexpectedClosing {
197                token: Block::Brace,
198                span: span.clone(),
199            }),
200            Token::RBracket => Err(ParseError::UnexpectedClosing {
201                token: Block::Bracket,
202                span: span.clone(),
203            }),
204            Token::EOF => unreachable!(),
205            _ => {
206                let tree = TokenTree::Token(TokenTreeToken {
207                    token: token.clone(),
208                    span: span.clone(),
209                });
210                self.advance();
211                Ok(tree)
212            }
213        }
214    }
215
216    /// Parse a delimited group
217    fn parse_delimited(&mut self, delimiter: Block) -> Result<TokenTree, ParseError> {
218        let open_span = self.current().1.clone();
219        self.advance(); // consume opening delimiter
220        let closing_token = delimiter.closing_token();
221        let mut trees = Vec::new();
222
223        loop {
224            let (token, span) = self.current();
225            if token == &closing_token {
226                let close_span = span.clone();
227                self.advance(); // consume closing delimiter
228                return Ok(TokenTree::Block(TokenTreeBlock {
229                    delimiter,
230                    open_span,
231                    trees,
232                    close_span,
233                }));
234            }
235
236            match token {
237                Token::EOF => {
238                    return Err(ParseError::UnexpectedEOF {
239                        expected: delimiter,
240                        expected_span: open_span,
241                    });
242                }
243                Token::RParen => {
244                    return Err(ParseError::MismatchedDelimiter {
245                        expected: delimiter,
246                        expected_span: open_span,
247                        found: Block::Parenthesis,
248                        found_span: span.clone(),
249                    });
250                }
251                Token::RBrace => {
252                    return Err(ParseError::MismatchedDelimiter {
253                        expected: delimiter,
254                        expected_span: open_span,
255                        found: Block::Brace,
256                        found_span: span.clone(),
257                    });
258                }
259                Token::RBracket => {
260                    return Err(ParseError::MismatchedDelimiter {
261                        expected: delimiter,
262                        expected_span: open_span,
263                        found: Block::Bracket,
264                        found_span: span.clone(),
265                    });
266                }
267                _ => {
268                    trees.push(self.parse_tree()?);
269                }
270            }
271        }
272    }
273
274    fn current(&self) -> &(Token, Range<usize>) {
275        &self.tokens[self.pos]
276    }
277
278    fn advance(&mut self) {
279        if self.pos < self.tokens.len() - 1 {
280            self.pos += 1;
281        }
282    }
283
284    fn is_at_end(&self) -> bool {
285        matches!(self.current().0, Token::EOF)
286    }
287
288    pub fn token_trees_to_string(trees: &[TokenTree]) -> String {
289        let mut result = String::new();
290        write_token_trees_internal(&mut result, &trees.iter().collect::<Vec<&TokenTree>>(), 0);
291        result
292    }
293}
294
295impl Display for TokenTree {
296    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
297        let mut string = String::new();
298        write_token_trees_internal(&mut string, &[self], 0);
299        write!(f, "{}", string)
300    }
301}
302fn write_token_trees_internal(string: &mut String, trees: &[&TokenTree], indent: usize) {
303    for tree in trees {
304        match tree {
305            TokenTree::Token(TokenTreeToken { token, span }) => {
306                string.push_str(&format!(
307                    "{:indent$}{:?} @ {:?}\n",
308                    "",
309                    token,
310                    span,
311                    indent = indent
312                ));
313            }
314            TokenTree::Block(TokenTreeBlock {
315                delimiter,
316                open_span,
317                trees,
318                close_span,
319            }) => {
320                string.push_str(&format!(
321                    "{:indent$}{:?} {{ @ {:?}\n",
322                    "",
323                    delimiter,
324                    open_span,
325                    indent = indent
326                ));
327                let tree_refs: Vec<&TokenTree> = trees.iter().collect();
328                write_token_trees_internal(string, &tree_refs, indent + 2);
329                string.push_str(&format!(
330                    "{:indent$}}} @ {:?}\n",
331                    "",
332                    close_span,
333                    indent = indent
334                ));
335            }
336        };
337    }
338}
339
340#[cfg(test)]
341mod tests {
342    use super::*;
343
344    fn parse_token_trees(source: &str) -> Result<Vec<TokenTree>, ParseError> {
345        let mut parser = TokenTreeParser::new(lexer(source).collect());
346        parser.parse()
347    }
348
349    #[test]
350    fn test_simple_tokens() {
351        let source = "let x = 5;";
352        let trees = parse_token_trees(source).unwrap();
353        assert_eq!(trees.len(), 5); // let, x, =, 5, ;
354    }
355
356    #[test]
357    fn test_parentheses() {
358        let source = "fn add(x, y)";
359        let trees = parse_token_trees(source).unwrap();
360        assert_eq!(trees.len(), 3); // fn, add, (x, y)
361
362        match &trees[2] {
363            TokenTree::Block(TokenTreeBlock {
364                delimiter, trees, ..
365            }) => {
366                assert_eq!(*delimiter, Block::Parenthesis);
367                assert_eq!(trees.len(), 3); // x, comma, y
368            }
369            _ => panic!("Expected delimited group"),
370        }
371    }
372
373    #[test]
374    fn test_nested_delimiters() {
375        let source = "fn foo() { let x = (1 + 2); }";
376        let trees = parse_token_trees(source).unwrap();
377
378        // Should have: fn, foo, (), {}
379        assert_eq!(trees.len(), 4);
380
381        // Check the body
382        match &trees[3] {
383            TokenTree::Block(TokenTreeBlock {
384                delimiter, trees, ..
385            }) => {
386                assert_eq!(*delimiter, Block::Brace);
387                // Should contain: let, x, =, (1 + 2), ;
388                assert_eq!(trees.len(), 5);
389            }
390            _ => panic!("Expected brace group"),
391        }
392    }
393
394    #[test]
395    fn test_mismatched_delimiter() {
396        let source = "fn foo(] { }";
397        let result = parse_token_trees(source);
398        assert!(result.is_err());
399
400        match result.unwrap_err() {
401            ParseError::MismatchedDelimiter { .. } => {}
402            e => panic!("Expected MismatchedDelimiter, got {:?}", e),
403        }
404    }
405
406    #[test]
407    fn test_unclosed_delimiter() {
408        let source = "fn foo() { let x = 5;";
409        let result = parse_token_trees(source);
410        assert!(result.is_err());
411
412        match result.unwrap_err() {
413            ParseError::UnexpectedEOF { .. } => {}
414            e => panic!("Expected UnexpectedEOF, got {:?}", e),
415        }
416    }
417
418    #[test]
419    fn test_unexpected_closing() {
420        let source = "let x = 5 }";
421        let result = parse_token_trees(source);
422        assert!(result.is_err());
423
424        match result.unwrap_err() {
425            ParseError::UnexpectedClosing { .. } => {}
426            e => panic!("Expected UnexpectedClosing, got {:?}", e),
427        }
428    }
429
430    #[test]
431    fn test_complex_nesting() {
432        let source = r#"
433            fn calculate() {
434                let result = add(mul(2, 3), sub(10, 5));
435                return result;
436            }
437        "#;
438        let trees = parse_token_trees(source).unwrap();
439        println!("{}", TokenTreeParser::token_trees_to_string(&trees));
440    }
441}