oak_handlebars/lexer/
mod.rs

1use crate::{kind::HandlebarsSyntaxKind, language::HandlebarsLanguage};
2use oak_core::{
3    IncrementalCache, Lexer, LexerState, OakError, Token,
4    lexer::{LexOutput, StringConfig, WhitespaceConfig},
5    source::Source,
6};
7use std::sync::LazyLock;
8
9type State<S> = LexerState<S, HandlebarsLanguage>;
10
11// Scanner configurations
12static HB_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
13static HB_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"', '\''], escape: Some('\\') });
14
15#[derive(Clone)]
16pub struct HandlebarsLexer<'config> {
17    config: &'config HandlebarsLanguage,
18}
19
20impl<'config> Lexer<HandlebarsLanguage> for HandlebarsLexer<'config> {
21    fn lex_incremental(
22        &self,
23        source: impl Source,
24        changed: usize,
25        cache: IncrementalCache<HandlebarsLanguage>,
26    ) -> LexOutput<HandlebarsLanguage> {
27        let mut state = LexerState::new_with_cache(source, changed, cache);
28        let result = self.run(&mut state);
29        state.finish(result)
30    }
31}
32
33impl<'config> HandlebarsLexer<'config> {
34    pub fn new(config: &'config HandlebarsLanguage) -> Self {
35        Self { config }
36    }
37
38    fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
39        while state.not_at_end() {
40            let safe_point = state.get_position();
41
42            if self.skip_whitespace(state) {
43                continue;
44            }
45
46            if self.skip_newline(state) {
47                continue;
48            }
49
50            if self.lex_comment(state) {
51                continue;
52            }
53
54            if self.lex_handlebars_expression(state) {
55                continue;
56            }
57
58            if self.lex_string_literal(state) {
59                continue;
60            }
61
62            if self.lex_number_literal(state) {
63                continue;
64            }
65
66            if self.lex_identifier(state) {
67                continue;
68            }
69
70            if self.lex_single_char_tokens(state) {
71                continue;
72            }
73
74            if self.lex_content(state) {
75                continue;
76            }
77
78            state.safe_check(safe_point);
79        }
80
81        // Add EOF token
82        let eof_pos = state.get_position();
83        state.add_token(HandlebarsSyntaxKind::Eof, eof_pos, eof_pos);
84        Ok(())
85    }
86
87    fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
88        match HB_WHITESPACE.scan(state.rest(), state.get_position(), HandlebarsSyntaxKind::Whitespace) {
89            Some(token) => {
90                state.advance_with(token);
91                true
92            }
93            None => false,
94        }
95    }
96
97    fn skip_newline<S: Source>(&self, state: &mut State<S>) -> bool {
98        if state.current() == Some('\n') || state.current() == Some('\r') {
99            let start = state.get_position();
100            state.advance(1);
101            if state.current() == Some('\n') && state.peek() == Some('\r') {
102                state.advance(1);
103            }
104            let end = state.get_position();
105            state.add_token(HandlebarsSyntaxKind::Newline, start, end);
106            true
107        }
108        else {
109            false
110        }
111    }
112
113    fn lex_comment<S: Source>(&self, state: &mut State<S>) -> bool {
114        let rest = state.rest();
115        if rest.starts_with("{{!--") {
116            let start = state.get_position();
117            state.advance(5); // Skip "{{!--"
118
119            // Find the end of the comment
120            while state.not_at_end() {
121                if state.rest().starts_with("--}}") {
122                    state.advance(4); // Skip "--}}"
123                    break;
124                }
125                state.advance(1);
126            }
127
128            let end = state.get_position();
129            state.add_token(HandlebarsSyntaxKind::Comment, start, end);
130            true
131        }
132        else if rest.starts_with("{{!") {
133            let start = state.get_position();
134            state.advance(3); // Skip "{{!"
135
136            // Find the end of the comment
137            while state.not_at_end() {
138                if state.rest().starts_with("}}") {
139                    state.advance(2); // Skip "}}"
140                    break;
141                }
142                state.advance(1);
143            }
144
145            let end = state.get_position();
146            state.add_token(HandlebarsSyntaxKind::Comment, start, end);
147            true
148        }
149        else {
150            false
151        }
152    }
153
154    fn lex_handlebars_expression<S: Source>(&self, state: &mut State<S>) -> bool {
155        let rest = state.rest();
156        let start = state.get_position();
157
158        if rest.starts_with("{{{") {
159            state.advance(3);
160            let end = state.get_position();
161            state.add_token(HandlebarsSyntaxKind::OpenUnescaped, start, end);
162            true
163        }
164        else if rest.starts_with("{{#") {
165            state.advance(3);
166            let end = state.get_position();
167            state.add_token(HandlebarsSyntaxKind::OpenBlock, start, end);
168            true
169        }
170        else if rest.starts_with("{{/") {
171            state.advance(3);
172            let end = state.get_position();
173            state.add_token(HandlebarsSyntaxKind::CloseBlock, start, end);
174            true
175        }
176        else if rest.starts_with("{{>") {
177            state.advance(3);
178            let end = state.get_position();
179            state.add_token(HandlebarsSyntaxKind::OpenPartial, start, end);
180            true
181        }
182        else if rest.starts_with("{{") {
183            state.advance(2);
184            let end = state.get_position();
185            state.add_token(HandlebarsSyntaxKind::Open, start, end);
186            true
187        }
188        else if rest.starts_with("}}}") {
189            state.advance(3);
190            let end = state.get_position();
191            state.add_token(HandlebarsSyntaxKind::CloseUnescaped, start, end);
192            true
193        }
194        else if rest.starts_with("}}") {
195            state.advance(2);
196            let end = state.get_position();
197            state.add_token(HandlebarsSyntaxKind::Close, start, end);
198            true
199        }
200        else {
201            false
202        }
203    }
204
205    fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
206        match HB_STRING.scan(state.rest(), 0, HandlebarsSyntaxKind::StringLiteral) {
207            Some(token) => {
208                // 创建新的 token 并调整位置为绝对位置
209                use std::range::Range;
210                let adjusted_token = Token {
211                    kind: token.kind,
212                    span: Range { start: token.span.start + state.get_position(), end: token.span.end + state.get_position() },
213                };
214                state.advance_with(adjusted_token);
215                true
216            }
217            None => false,
218        }
219    }
220
221    fn lex_number_literal<S: Source>(&self, state: &mut State<S>) -> bool {
222        if let Some(c) = state.current() {
223            if c.is_ascii_digit() {
224                let start = state.get_position();
225                while let Some(c) = state.current() {
226                    if c.is_ascii_digit() || c == '.' {
227                        state.advance(1);
228                    }
229                    else {
230                        break;
231                    }
232                }
233                let end = state.get_position();
234                state.add_token(HandlebarsSyntaxKind::NumberLiteral, start, end);
235                true
236            }
237            else {
238                false
239            }
240        }
241        else {
242            false
243        }
244    }
245
246    fn lex_identifier<S: Source>(&self, state: &mut State<S>) -> bool {
247        if let Some(c) = state.current() {
248            if c.is_alphabetic() || c == '_' {
249                let start = state.get_position();
250                while let Some(c) = state.current() {
251                    if c.is_alphanumeric() || c == '_' || c == '-' || c == '.' {
252                        state.advance(1);
253                    }
254                    else {
255                        break;
256                    }
257                }
258                let end = state.get_position();
259                state.add_token(HandlebarsSyntaxKind::Identifier, start, end);
260                true
261            }
262            else {
263                false
264            }
265        }
266        else {
267            false
268        }
269    }
270
271    fn lex_single_char_tokens<S: Source>(&self, state: &mut State<S>) -> bool {
272        if let Some(c) = state.current() {
273            let start = state.get_position();
274            let kind = match c {
275                '(' => HandlebarsSyntaxKind::LeftParen,
276                ')' => HandlebarsSyntaxKind::RightParen,
277                '[' => HandlebarsSyntaxKind::LeftBracket,
278                ']' => HandlebarsSyntaxKind::RightBracket,
279                '=' => HandlebarsSyntaxKind::Equal,
280                '|' => HandlebarsSyntaxKind::Pipe,
281                '#' => HandlebarsSyntaxKind::Hash,
282                '.' => HandlebarsSyntaxKind::Dot,
283                '/' => HandlebarsSyntaxKind::Slash,
284                _ => return false,
285            };
286            state.advance(1);
287            let end = state.get_position();
288            state.add_token(kind, start, end);
289            true
290        }
291        else {
292            false
293        }
294    }
295
296    fn lex_content<S: Source>(&self, state: &mut State<S>) -> bool {
297        let start = state.get_position();
298        let mut has_content = false;
299
300        while state.not_at_end() {
301            let rest = state.rest();
302            // Stop if we encounter Handlebars kind
303            if rest.starts_with("{{") {
304                break;
305            }
306            state.advance(1);
307            has_content = true;
308        }
309
310        if has_content {
311            let end = state.get_position();
312            state.add_token(HandlebarsSyntaxKind::Content, start, end);
313            true
314        }
315        else {
316            false
317        }
318    }
319}