1use crate::{
2 ast::{Loc, Source, SourceKnown, Span},
3 lexer_types::{GroupType, Token, TokenTree, Tokens},
4};
5use line_col::LineColLookup;
6use logos::Logos;
7use std::rc::Rc;
8
9pub const KEYWORDS: &[&str] = &[
10 "actor",
11 "and",
12 "async",
13 "assert",
14 "await",
15 "break",
16 "case",
17 "catch",
18 "class",
19 "composite",
20 "continue",
21 "debug",
22 "else",
23 "false",
24 "for",
25 "func",
26 "if",
27 "in",
28 "import",
29 "module",
30 "not",
31 "null",
32 "object",
33 "or",
34 "label",
35 "let",
36 "loop",
37 "private",
38 "public",
39 "return",
40 "shared",
41 "try",
42 "throw",
43 "debug_show",
44 "query",
45 "switch",
46 "true",
47 "type",
48 "var",
49 "while",
50 "stable",
51 "flexible",
52 "system",
53 "ignore",
54 "to_candid",
55 "from_candid",
56 "with",
57 "finally",
58];
59
60pub fn is_keyword(ident: &str) -> bool {
61 KEYWORDS.contains(&ident)
62}
63
64pub type LexResult<T> = Result<T, ()>;
65
66pub fn create_token_tree(input: &str) -> LexResult<TokenTree> {
67 group(create_token_vec(input)?)
68}
69
70pub fn create_token_vec(input: &str) -> LexResult<Tokens> {
71 let line_col = LineColLookup::new(input);
72 let mut tokens = vec![];
73 let tokenize_source = |tokens: &mut Tokens, input: &str| {
75 tokens.extend(Token::lexer(input).spanned().map(|(t, span)| {
76 let t = match t {
78 Token::Error => Token::Unknown(input[span.clone()].to_string()),
79 t => t,
80 };
81 let (line, col) = line_col.get(span.start);
82 Loc(t, Source::Known(Rc::new(SourceKnown { span, line, col })))
83 }));
84 };
85 let comment_spans = find_comment_spans(input);
86 tokenize_source(
88 &mut tokens,
89 &input[..comment_spans.get(0).map(|s| s.start).unwrap_or(input.len())],
90 );
91 for (i, span) in comment_spans.iter().enumerate() {
92 let comment = input[span.clone()].to_string();
94 let (line, col) = line_col.get(span.start);
95 tokens.push(Loc(
96 if comment.starts_with("//") {
97 Token::LineComment(comment)
98 } else {
99 Token::BlockComment(comment)
100 },
101 Source::Known(Rc::new(SourceKnown {
102 span: span.clone(),
103 line,
104 col,
105 })),
106 ));
107 tokenize_source(
109 &mut tokens,
110 &input[span.end
111 ..comment_spans
112 .get(i + 1)
113 .map(|s| s.start)
114 .unwrap_or(input.len())],
115 );
116 }
117 Ok(tokens)
118}
119
120pub fn group(tokens: Tokens) -> LexResult<TokenTree> {
121 Ok(TokenTree::Group(
122 group_(&tokens)?,
123 GroupType::Unenclosed,
124 None,
125 ))
126}
127
128fn group_(tokens: &[Loc<Token>]) -> LexResult<Vec<TokenTree>> {
129 let mut result = vec![];
130 let mut i = 0;
131 while i < tokens.len() {
132 let token = &tokens[i];
133 result.push(match &token.0 {
134 Token::Open((_, g)) => {
135 let start = i;
136 if let Some(end) = find_closing(g, tokens, i) {
137 i = end;
138 TokenTree::Group(
139 group_(&tokens[start + 1..i])?,
140 g.clone(),
141 Some((token.clone(), tokens[i].clone())),
142 )
143 } else {
144 TokenTree::Token(token.clone())
146 }
147 }
148 _ => TokenTree::Token(token.clone()),
149 });
150 i += 1;
151 }
152 Ok(result)
153}
154
155fn find_closing(sort: &GroupType, tokens: &[Loc<Token>], start: usize) -> Option<usize> {
156 let mut i = start + 1;
157 let mut depth: usize = 0;
158 while i < tokens.len() {
159 let Loc(t, _) = &tokens[i];
160
161 if let Token::Open((_, g)) = t {
162 if g == sort {
163 depth += 1;
164 } else if g == &GroupType::Comment {
165 if let Some(j) = find_closing(g, tokens, i) {
167 i = j;
168 }
169 }
170 };
171 if let Token::Close((_, g)) = t {
172 if g == sort {
173 if depth == 0 {
174 return Some(i);
175 }
176 depth -= 1;
177 }
178 };
179 i += 1;
180 }
181 None
182}
183
184pub fn find_comment_spans(input: &str) -> Vec<Span> {
185 let mut iter = input.char_indices().peekable();
186 let mut results = vec![];
187 let mut block_start: Option<usize> = None;
188 let mut nest_depth = 0;
189 while let Some((i, c)) = iter.next() {
190 match c {
191 '"' | '\'' if nest_depth == 0 => {
192 let mut escaped = false;
194 while let Some((_, c1)) = iter.next() {
195 if escaped {
196 escaped = false;
198 } else if c1 == '\\' {
199 escaped = true;
201 } else if c1 == c {
202 break;
204 }
205 }
206 }
207 '/' => match iter.peek() {
208 Some((_, '*')) => {
209 iter.next().unwrap();
211 if nest_depth == 0 {
212 block_start = Some(i);
213 }
214 nest_depth += 1;
215 }
216 Some((_, '/')) if nest_depth == 0 => {
217 loop {
219 match iter.next() {
220 Some((j, '\n')) => {
221 results.push(i..j);
223 break;
224 }
225 None => {
226 results.push(i..input.len());
228 break;
229 }
230 _ => (),
231 }
232 }
233 }
234 _ => (),
235 },
236 '*' if nest_depth > 0 => {
237 if let Some((_, '/')) = iter.peek() {
238 nest_depth -= 1;
240 if nest_depth == 0 {
241 let (end, _) = iter.next().unwrap();
242 results.push(block_start.unwrap()..end + 1);
243 block_start = None;
244 }
245 }
246 }
247 _ => (),
248 }
249 }
250 results
251}