motoko/
lexer.rs

1use crate::{
2    ast::{Loc, Source, SourceKnown, Span},
3    lexer_types::{GroupType, Token, TokenTree, Tokens},
4};
5use line_col::LineColLookup;
6use logos::Logos;
7use std::rc::Rc;
8
9pub const KEYWORDS: &[&str] = &[
10    "actor",
11    "and",
12    "async",
13    "assert",
14    "await",
15    "break",
16    "case",
17    "catch",
18    "class",
19    "composite",
20    "continue",
21    "debug",
22    "else",
23    "false",
24    "for",
25    "func",
26    "if",
27    "in",
28    "import",
29    "module",
30    "not",
31    "null",
32    "object",
33    "or",
34    "label",
35    "let",
36    "loop",
37    "private",
38    "public",
39    "return",
40    "shared",
41    "try",
42    "throw",
43    "debug_show",
44    "query",
45    "switch",
46    "true",
47    "type",
48    "var",
49    "while",
50    "stable",
51    "flexible",
52    "system",
53    "ignore",
54    "to_candid",
55    "from_candid",
56    "with",
57    "finally",
58];
59
60pub fn is_keyword(ident: &str) -> bool {
61    KEYWORDS.contains(&ident)
62}
63
64pub type LexResult<T> = Result<T, ()>;
65
66pub fn create_token_tree(input: &str) -> LexResult<TokenTree> {
67    group(create_token_vec(input)?)
68}
69
70pub fn create_token_vec(input: &str) -> LexResult<Tokens> {
71    let line_col = LineColLookup::new(input);
72    let mut tokens = vec![];
73    // Tokenize source code (excluding comments)
74    let tokenize_source = |tokens: &mut Tokens, input: &str| {
75        tokens.extend(Token::lexer(input).spanned().map(|(t, span)| {
76            // Convert errors to the `Unknown` token type
77            let t = match t {
78                Token::Error => Token::Unknown(input[span.clone()].to_string()),
79                t => t,
80            };
81            let (line, col) = line_col.get(span.start);
82            Loc(t, Source::Known(Rc::new(SourceKnown { span, line, col })))
83        }));
84    };
85    let comment_spans = find_comment_spans(input);
86    // Tokenize everything before the first comment (or end of input)
87    tokenize_source(
88        &mut tokens,
89        &input[..comment_spans.get(0).map(|s| s.start).unwrap_or(input.len())],
90    );
91    for (i, span) in comment_spans.iter().enumerate() {
92        // Add comment token
93        let comment = input[span.clone()].to_string();
94        let (line, col) = line_col.get(span.start);
95        tokens.push(Loc(
96            if comment.starts_with("//") {
97                Token::LineComment(comment)
98            } else {
99                Token::BlockComment(comment)
100            },
101            Source::Known(Rc::new(SourceKnown {
102                span: span.clone(),
103                line,
104                col,
105            })),
106        ));
107        // Tokenize source after comment
108        tokenize_source(
109            &mut tokens,
110            &input[span.end
111                ..comment_spans
112                    .get(i + 1)
113                    .map(|s| s.start)
114                    .unwrap_or(input.len())],
115        );
116    }
117    Ok(tokens)
118}
119
120pub fn group(tokens: Tokens) -> LexResult<TokenTree> {
121    Ok(TokenTree::Group(
122        group_(&tokens)?,
123        GroupType::Unenclosed,
124        None,
125    ))
126}
127
128fn group_(tokens: &[Loc<Token>]) -> LexResult<Vec<TokenTree>> {
129    let mut result = vec![];
130    let mut i = 0;
131    while i < tokens.len() {
132        let token = &tokens[i];
133        result.push(match &token.0 {
134            Token::Open((_, g)) => {
135                let start = i;
136                if let Some(end) = find_closing(g, tokens, i) {
137                    i = end;
138                    TokenTree::Group(
139                        group_(&tokens[start + 1..i])?,
140                        g.clone(),
141                        Some((token.clone(), tokens[i].clone())),
142                    )
143                } else {
144                    // Extraneous opening token
145                    TokenTree::Token(token.clone())
146                }
147            }
148            _ => TokenTree::Token(token.clone()),
149        });
150        i += 1;
151    }
152    Ok(result)
153}
154
155fn find_closing(sort: &GroupType, tokens: &[Loc<Token>], start: usize) -> Option<usize> {
156    let mut i = start + 1;
157    let mut depth: usize = 0;
158    while i < tokens.len() {
159        let Loc(t, _) = &tokens[i];
160
161        if let Token::Open((_, g)) = t {
162            if g == sort {
163                depth += 1;
164            } else if g == &GroupType::Comment {
165                // Skip depth check in block comments
166                if let Some(j) = find_closing(g, tokens, i) {
167                    i = j;
168                }
169            }
170        };
171        if let Token::Close((_, g)) = t {
172            if g == sort {
173                if depth == 0 {
174                    return Some(i);
175                }
176                depth -= 1;
177            }
178        };
179        i += 1;
180    }
181    None
182}
183
184pub fn find_comment_spans(input: &str) -> Vec<Span> {
185    let mut iter = input.char_indices().peekable();
186    let mut results = vec![];
187    let mut block_start: Option<usize> = None;
188    let mut nest_depth = 0;
189    while let Some((i, c)) = iter.next() {
190        match c {
191            '"' | '\'' if nest_depth == 0 => {
192                // String literal
193                let mut escaped = false;
194                while let Some((_, c1)) = iter.next() {
195                    if escaped {
196                        // Skip escaped character
197                        escaped = false;
198                    } else if c1 == '\\' {
199                        // Escape next character
200                        escaped = true;
201                    } else if c1 == c {
202                        // End string literal
203                        break;
204                    }
205                }
206            }
207            '/' => match iter.peek() {
208                Some((_, '*')) => {
209                    // Start block comment
210                    iter.next().unwrap();
211                    if nest_depth == 0 {
212                        block_start = Some(i);
213                    }
214                    nest_depth += 1;
215                }
216                Some((_, '/')) if nest_depth == 0 => {
217                    // Line comment
218                    loop {
219                        match iter.next() {
220                            Some((j, '\n')) => {
221                                // Newline
222                                results.push(i..j);
223                                break;
224                            }
225                            None => {
226                                // End of input
227                                results.push(i..input.len());
228                                break;
229                            }
230                            _ => (),
231                        }
232                    }
233                }
234                _ => (),
235            },
236            '*' if nest_depth > 0 => {
237                if let Some((_, '/')) = iter.peek() {
238                    // End block comment
239                    nest_depth -= 1;
240                    if nest_depth == 0 {
241                        let (end, _) = iter.next().unwrap();
242                        results.push(block_start.unwrap()..end + 1);
243                        block_start = None;
244                    }
245                }
246            }
247            _ => (),
248        }
249    }
250    results
251}