1use crate::{kind::TclSyntaxKind, language::TclLanguage};
2use oak_core::{
3 Lexer, LexerState, OakError,
4 lexer::{CommentConfig, LexOutput, LexerCache, StringConfig, WhitespaceConfig},
5 source::Source,
6};
7
8type State<'s, S> = LexerState<'s, S, TclLanguage>;
9
10static TCL_WHITESPACE: WhitespaceConfig = WhitespaceConfig { unicode_whitespace: true };
11static TCL_COMMENT: CommentConfig = CommentConfig { line_marker: "#", block_start: "", block_end: "", nested_blocks: false };
12static TCL_STRING: StringConfig = StringConfig { quotes: &['"'], escape: Some('\\') };
13
14#[derive(Clone)]
15pub struct TclLexer<'config> {
16 _config: &'config TclLanguage,
17}
18
19impl<'config> TclLexer<'config> {
20 pub fn new(config: &'config TclLanguage) -> Self {
21 Self { _config: config }
22 }
23
24 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
25 while state.not_at_end() {
26 let safe_point = state.get_position();
27
28 if self.skip_whitespace(state) {
29 continue;
30 }
31
32 if self.lex_newline(state) {
33 continue;
34 }
35
36 if self.skip_comment(state) {
37 continue;
38 }
39
40 if self.lex_string_literal(state) {
41 continue;
42 }
43
44 if self.lex_brace_string(state) {
45 continue;
46 }
47
48 if self.lex_numeric_literal(state) {
49 continue;
50 }
51
52 if self.lex_identifier_or_keyword(state) {
53 continue;
54 }
55
56 if self.lex_operators(state) {
57 continue;
58 }
59
60 if self.lex_single_char_tokens(state) {
61 continue;
62 }
63
64 if let Some(ch) = state.current() {
66 state.advance(ch.len_utf8());
67 }
68
69 state.advance_if_dead_lock(safe_point);
70 }
71
72 state.add_eof();
73 Ok(())
74 }
75}
76
77impl<'config> Lexer<TclLanguage> for TclLexer<'config> {
78 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<TclLanguage>) -> LexOutput<TclLanguage> {
79 let mut state = State::new(source);
80 let result = self.run(&mut state);
81 state.finish_with_cache(result, cache)
82 }
83}
84
85impl<'config> TclLexer<'config> {
86 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
87 TCL_WHITESPACE.scan(state, TclSyntaxKind::Whitespace)
88 }
89
90 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
91 if let Some(ch) = state.current() {
92 if ch == '\n' {
93 let start = state.get_position();
94 state.advance(1);
95 state.add_token(TclSyntaxKind::Newline, start, state.get_position());
96 return true;
97 }
98 else if ch == '\r' {
99 let start = state.get_position();
100 state.advance(1);
101 if state.current() == Some('\n') {
102 state.advance(1);
103 }
104 state.add_token(TclSyntaxKind::Newline, start, state.get_position());
105 return true;
106 }
107 }
108 false
109 }
110
111 fn skip_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
112 TCL_COMMENT.scan(state, TclSyntaxKind::Comment, TclSyntaxKind::Comment)
113 }
114
115 fn lex_string_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
116 TCL_STRING.scan(state, TclSyntaxKind::StringLiteral)
117 }
118
119 fn lex_brace_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
120 let start = state.get_position();
121
122 if state.current() != Some('{') {
123 return false;
124 }
125
126 state.advance(1);
127 let mut brace_count = 1;
128
129 while let Some(ch) = state.peek() {
130 if ch == '{' {
131 brace_count += 1;
132 }
133 else if ch == '}' {
134 brace_count -= 1;
135 if brace_count == 0 {
136 state.advance(1);
137 break;
138 }
139 }
140 state.advance(ch.len_utf8());
141 }
142
143 state.add_token(TclSyntaxKind::StringLiteral, start, state.get_position());
144 true
145 }
146
147 fn lex_numeric_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
148 let start = state.get_position();
149 let first = match state.current() {
150 Some(c) => c,
151 None => return false,
152 };
153
154 if !first.is_ascii_digit() && !(first == '-' && state.peek().map_or(false, |c| c.is_ascii_digit())) {
155 return false;
156 }
157
158 if first == '-' {
159 state.advance(1);
160 }
161
162 while let Some(c) = state.current() {
164 if c.is_ascii_digit() {
165 state.advance(1);
166 }
167 else {
168 break;
169 }
170 }
171
172 if state.current() == Some('.') && state.peek().map_or(false, |c| c.is_ascii_digit()) {
174 state.advance(1); while let Some(c) = state.current() {
176 if c.is_ascii_digit() {
177 state.advance(1);
178 }
179 else {
180 break;
181 }
182 }
183 }
184
185 if let Some(c) = state.current() {
187 if c == 'e' || c == 'E' {
188 let next = state.peek();
189 if next == Some('+') || next == Some('-') || next.map_or(false, |d| d.is_ascii_digit()) {
190 state.advance(1);
191 if let Some(sign) = state.current() {
192 if sign == '+' || sign == '-' {
193 state.advance(1);
194 }
195 }
196 while let Some(d) = state.current() {
197 if d.is_ascii_digit() {
198 state.advance(1);
199 }
200 else {
201 break;
202 }
203 }
204 }
205 }
206 }
207
208 state.add_token(TclSyntaxKind::Number, start, state.get_position());
209 true
210 }
211
212 fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
213 let start = state.get_position();
214 let ch = match state.current() {
215 Some(c) => c,
216 None => return false,
217 };
218
219 if !(ch.is_ascii_alphabetic() || ch == '_') {
220 return false;
221 }
222
223 state.advance(ch.len_utf8());
224 while let Some(c) = state.current() {
225 if c.is_ascii_alphanumeric() || c == '_' {
226 state.advance(c.len_utf8());
227 }
228 else {
229 break;
230 }
231 }
232
233 let end = state.get_position();
234 let text = state.source().get_text_in(oak_core::Range { start, end });
235 let kind = match text.as_ref() {
236 "if" => TclSyntaxKind::If,
237 "else" => TclSyntaxKind::Else,
238 "elseif" => TclSyntaxKind::ElseIf,
239 "for" => TclSyntaxKind::For,
240 "while" => TclSyntaxKind::While,
241 "foreach" => TclSyntaxKind::ForEach,
242 "proc" => TclSyntaxKind::Proc,
243 "return" => TclSyntaxKind::Return,
244 "break" => TclSyntaxKind::Break,
245 "continue" => TclSyntaxKind::Continue,
246 "set" => TclSyntaxKind::Set,
247 "unset" => TclSyntaxKind::Unset,
248 "global" => TclSyntaxKind::Global,
249 "upvar" => TclSyntaxKind::Upvar,
250 "variable" => TclSyntaxKind::Variable,
251 _ => TclSyntaxKind::Identifier,
252 };
253
254 state.add_token(kind, start, state.get_position());
255 true
256 }
257
258 fn lex_operators<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
259 let start = state.get_position();
260
261 let patterns: &[(&str, TclSyntaxKind)] =
263 &[("==", TclSyntaxKind::Equal), ("!=", TclSyntaxKind::NotEqual), ("<=", TclSyntaxKind::LessEqual), (">=", TclSyntaxKind::GreaterEqual), ("&&", TclSyntaxKind::AmpersandAmpersand), ("||", TclSyntaxKind::PipePipe)];
264
265 for (pat, kind) in patterns {
266 let mut matches = true;
267 for (i, c) in pat.chars().enumerate() {
268 if state.peek_next_n(i) != Some(c) {
269 matches = false;
270 break;
271 }
272 }
273
274 if matches {
275 state.advance(pat.len());
276 state.add_token(*kind, start, state.get_position());
277 return true;
278 }
279 }
280
281 if let Some(ch) = state.current() {
283 let kind = match ch {
284 '+' => Some(TclSyntaxKind::Plus),
285 '-' => Some(TclSyntaxKind::Minus),
286 '*' => Some(TclSyntaxKind::Star),
287 '/' => Some(TclSyntaxKind::Slash),
288 '%' => Some(TclSyntaxKind::Percent),
289 '<' => Some(TclSyntaxKind::Less),
290 '>' => Some(TclSyntaxKind::Greater),
291 '!' => Some(TclSyntaxKind::Exclamation),
292 '&' => Some(TclSyntaxKind::Ampersand),
293 '|' => Some(TclSyntaxKind::Pipe),
294 '=' => Some(TclSyntaxKind::Equal),
295 _ => None,
296 };
297
298 if let Some(k) = kind {
299 state.advance(ch.len_utf8());
300 state.add_token(k, start, state.get_position());
301 return true;
302 }
303 }
304 false
305 }
306
307 fn lex_single_char_tokens<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
308 let start = state.get_position();
309
310 if let Some(ch) = state.current() {
311 let kind = match ch {
312 '(' => TclSyntaxKind::LeftParen,
313 ')' => TclSyntaxKind::RightParen,
314 '[' => TclSyntaxKind::LeftBracket,
315 ']' => TclSyntaxKind::RightBracket,
316 '{' => TclSyntaxKind::LeftBrace,
317 '}' => TclSyntaxKind::RightBrace,
318 ';' => TclSyntaxKind::Semicolon,
319 ',' => TclSyntaxKind::Comma,
320 '$' => TclSyntaxKind::Dollar,
321 _ => return false,
322 };
323
324 state.advance(ch.len_utf8());
325 state.add_token(kind, start, state.get_position());
326 true
327 }
328 else {
329 false
330 }
331 }
332}