Skip to main content

oak_handlebars/lexer/
mod.rs

1use crate::{kind::HandlebarsSyntaxKind, language::HandlebarsLanguage};
2use oak_core::{
3    Lexer, LexerCache, LexerState, OakError, Range,
4    lexer::{LexOutput, StringConfig, WhitespaceConfig},
5    source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, HandlebarsLanguage>;
10
11// Scanner configurations
12static HB_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
13static HB_STRING_DOUBLE: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14static HB_STRING_SINGLE: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
15
16#[derive(Clone)]
17pub struct HandlebarsLexer<'config> {
18    _config: &'config HandlebarsLanguage,
19}
20
21impl<'config> Lexer<HandlebarsLanguage> for HandlebarsLexer<'config> {
22    fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<HandlebarsLanguage>) -> LexOutput<HandlebarsLanguage> {
23        let mut state: State<'_, S> = LexerState::new(source);
24        let result = self.run(&mut state);
25        if result.is_ok() {
26            state.add_eof();
27        }
28        state.finish_with_cache(result, cache)
29    }
30}
31
32impl<'config> HandlebarsLexer<'config> {
33    pub fn new(config: &'config HandlebarsLanguage) -> Self {
34        Self { _config: config }
35    }
36
37    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
38        while state.not_at_end() {
39            let safe_point = state.get_position();
40
41            if self.skip_whitespace(state) {
42                continue;
43            }
44
45            if self.skip_newline(state) {
46                continue;
47            }
48
49            if self.lex_comment(state) {
50                continue;
51            }
52
53            if self.lex_handlebars_expression(state) {
54                continue;
55            }
56
57            if self.lex_string_literal(state) {
58                continue;
59            }
60
61            if self.lex_number_literal(state) {
62                continue;
63            }
64
65            if self.lex_identifier(state) {
66                continue;
67            }
68
69            if self.lex_single_char_tokens(state) {
70                continue;
71            }
72
73            if self.lex_content(state) {
74                continue;
75            }
76
77            state.advance_if_dead_lock(safe_point);
78        }
79
80        Ok(())
81    }
82
83    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
84        HB_WHITESPACE.scan(state, HandlebarsSyntaxKind::Whitespace)
85    }
86
87    fn skip_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
88        if state.current() == Some('\n') || state.current() == Some('\r') {
89            let start = state.get_position();
90            state.advance(1);
91            if state.current() == Some('\n') && state.peek() == Some('\r') {
92                state.advance(1);
93            }
94            let end = state.get_position();
95            state.add_token(HandlebarsSyntaxKind::Newline, start, end);
96            true
97        }
98        else {
99            false
100        }
101    }
102
103    fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
104        let start = state.get_position();
105        if state.consume_if_starts_with("{{!--") {
106            // Find the end of the comment
107            while state.not_at_end() {
108                if state.starts_with("--}}") {
109                    state.advance(4); // Skip "--}}"
110                    break;
111                }
112                state.advance(1);
113            }
114
115            let end = state.get_position();
116            state.add_token(HandlebarsSyntaxKind::Comment, start, end);
117            true
118        }
119        else if state.consume_if_starts_with("{{!") {
120            // Find the end of the comment
121            while state.not_at_end() {
122                if state.starts_with("}}") {
123                    state.advance(2); // Skip "}}"
124                    break;
125                }
126                state.advance(1);
127            }
128
129            let end = state.get_position();
130            state.add_token(HandlebarsSyntaxKind::Comment, start, end);
131            true
132        }
133        else {
134            false
135        }
136    }
137
138    fn lex_handlebars_expression<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
139        let start = state.get_position();
140
141        if state.consume_if_starts_with("{{{{/") {
142            let end = state.get_position();
143            state.add_token(HandlebarsSyntaxKind::OpenEndRawBlock, start, end);
144            true
145        }
146        else if state.consume_if_starts_with("{{{{") {
147            let end = state.get_position();
148            state.add_token(HandlebarsSyntaxKind::OpenRawBlock, start, end);
149            true
150        }
151        else if state.consume_if_starts_with("}}}}") {
152            let end = state.get_position();
153            state.add_token(HandlebarsSyntaxKind::CloseRawBlock, start, end);
154            true
155        }
156        else if state.consume_if_starts_with("{{{") {
157            let end = state.get_position();
158            state.add_token(HandlebarsSyntaxKind::OpenUnescaped, start, end);
159            true
160        }
161        else if state.consume_if_starts_with("{{#") {
162            let end = state.get_position();
163            state.add_token(HandlebarsSyntaxKind::OpenBlock, start, end);
164            true
165        }
166        else if state.consume_if_starts_with("{{^") {
167            let end = state.get_position();
168            state.add_token(HandlebarsSyntaxKind::OpenInverseBlock, start, end);
169            true
170        }
171        else if state.consume_if_starts_with("{{/") {
172            let end = state.get_position();
173            state.add_token(HandlebarsSyntaxKind::CloseBlock, start, end);
174            true
175        }
176        else if state.consume_if_starts_with("{{>") {
177            let end = state.get_position();
178            state.add_token(HandlebarsSyntaxKind::OpenPartial, start, end);
179            true
180        }
181        else if state.consume_if_starts_with("{{") {
182            let end = state.get_position();
183            state.add_token(HandlebarsSyntaxKind::Open, start, end);
184            true
185        }
186        else if state.consume_if_starts_with("}}}") {
187            let end = state.get_position();
188            state.add_token(HandlebarsSyntaxKind::CloseUnescaped, start, end);
189            true
190        }
191        else if state.consume_if_starts_with("}}") {
192            let end = state.get_position();
193            state.add_token(HandlebarsSyntaxKind::Close, start, end);
194            true
195        }
196        else {
197            false
198        }
199    }
200
201    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
202        let config = if state.current() == Some('"') {
203            &*HB_STRING_DOUBLE
204        }
205        else if state.current() == Some('\'') {
206            &*HB_STRING_SINGLE
207        }
208        else {
209            return false;
210        };
211
212        config.scan(state, HandlebarsSyntaxKind::StringLiteral)
213    }
214
215    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
216        if let Some(c) = state.current() {
217            if c.is_ascii_digit() {
218                let start = state.get_position();
219                while let Some(c) = state.current() {
220                    if c.is_ascii_digit() || c == '.' {
221                        state.advance(1);
222                    }
223                    else {
224                        break;
225                    }
226                }
227                let end = state.get_position();
228                state.add_token(HandlebarsSyntaxKind::NumberLiteral, start, end);
229                true
230            }
231            else {
232                false
233            }
234        }
235        else {
236            false
237        }
238    }
239
240    fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
241        if let Some(c) = state.current() {
242            if c.is_alphabetic() || c == '_' || c == '@' {
243                let start = state.get_position();
244                while let Some(c) = state.current() {
245                    if c.is_alphanumeric() || c == '_' || c == '-' || c == '.' {
246                        state.advance(1);
247                    }
248                    else {
249                        break;
250                    }
251                }
252                let end = state.get_position();
253                let text = state.get_text_in(Range { start, end });
254                let kind = match text.as_ref() {
255                    "else" => HandlebarsSyntaxKind::Else,
256                    "true" | "false" => HandlebarsSyntaxKind::BooleanLiteral,
257                    _ => HandlebarsSyntaxKind::Identifier,
258                };
259                state.add_token(kind, start, end);
260                true
261            }
262            else {
263                false
264            }
265        }
266        else {
267            false
268        }
269    }
270
271    fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
272        if let Some(c) = state.current() {
273            let start = state.get_position();
274            let kind = match c {
275                '(' => HandlebarsSyntaxKind::LeftParen,
276                ')' => HandlebarsSyntaxKind::RightParen,
277                '[' => HandlebarsSyntaxKind::LeftBracket,
278                ']' => HandlebarsSyntaxKind::RightBracket,
279                '=' => HandlebarsSyntaxKind::Equal,
280                '|' => HandlebarsSyntaxKind::Pipe,
281                '#' => HandlebarsSyntaxKind::Hash,
282                '.' => HandlebarsSyntaxKind::Dot,
283                '/' => HandlebarsSyntaxKind::Slash,
284                '@' => HandlebarsSyntaxKind::At,
285                '^' => HandlebarsSyntaxKind::Caret,
286                _ => return false,
287            };
288            state.advance(1);
289            let end = state.get_position();
290            state.add_token(kind, start, end);
291            true
292        }
293        else {
294            false
295        }
296    }
297
298    fn lex_content<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
299        let start = state.get_position();
300        let mut count = 0;
301
302        while let Some(c) = state.current() {
303            if c == '{' && state.peek() == Some('{') {
304                break;
305            }
306            state.advance(1);
307            count += 1;
308        }
309
310        if count > 0 {
311            let end = state.get_position();
312            state.add_token(HandlebarsSyntaxKind::Content, start, end);
313            true
314        }
315        else {
316            false
317        }
318    }
319}