Skip to main content

oak_handlebars/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2use crate::language::HandlebarsLanguage;
3pub mod token_type;
4pub use token_type::HandlebarsTokenType;
5
6use oak_core::{
7    Lexer, LexerCache, LexerState, OakError, Range,
8    lexer::{LexOutput, StringConfig, WhitespaceConfig},
9    source::Source,
10};
11use std::sync::LazyLock;
12
13pub(crate) type State<'a, S> = LexerState<'a, S, HandlebarsLanguage>;
14
15// Scanner configurations
16static HB_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
17static HB_STRING_DOUBLE: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
18static HB_STRING_SINGLE: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
19
20/// Handlebars lexer.
21#[derive(Clone)]
22pub struct HandlebarsLexer<'config> {
23    config: &'config HandlebarsLanguage,
24}
25
26impl<'config> Lexer<HandlebarsLanguage> for HandlebarsLexer<'config> {
27    fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<HandlebarsLanguage>) -> LexOutput<HandlebarsLanguage> {
28        let mut state: State<'_, S> = LexerState::new(source);
29        let result = self.run(&mut state);
30        if result.is_ok() {
31            state.add_eof()
32        }
33        state.finish_with_cache(result, cache)
34    }
35}
36
37impl<'config> HandlebarsLexer<'config> {
38    /// Creates a new `HandlebarsLexer`.
39    pub fn new(config: &'config HandlebarsLanguage) -> Self {
40        Self { config }
41    }
42
43    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
44        while state.not_at_end() {
45            let safe_point = state.get_position();
46
47            if self.skip_whitespace(state) {
48                continue;
49            }
50
51            if self.skip_newline(state) {
52                continue;
53            }
54
55            if self.lex_comment(state) {
56                continue;
57            }
58
59            if self.lex_handlebars_expression(state) {
60                continue;
61            }
62
63            if self.lex_string_literal(state) {
64                continue;
65            }
66
67            if self.lex_number_literal(state) {
68                continue;
69            }
70
71            if self.lex_identifier(state) {
72                continue;
73            }
74
75            if self.lex_single_char_tokens(state) {
76                continue;
77            }
78
79            if self.lex_content(state) {
80                continue;
81            }
82
83            state.advance_if_dead_lock(safe_point)
84        }
85
86        Ok(())
87    }
88
89    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
90        HB_WHITESPACE.scan(state, HandlebarsTokenType::Whitespace)
91    }
92
93    fn skip_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
94        if state.current() == Some('\n') || state.current() == Some('\r') {
95            let start = state.get_position();
96            state.advance(1);
97            if state.current() == Some('\n') && state.peek() == Some('\r') {
98                state.advance(1)
99            }
100            let end = state.get_position();
101            state.add_token(HandlebarsTokenType::Newline, start, end);
102            true
103        }
104        else {
105            false
106        }
107    }
108
109    fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
110        let rest = state.rest();
111        if rest.starts_with(&self.config.variable_start) {
112            let comment_rest = &rest[self.config.variable_start.len()..];
113            if comment_rest.starts_with("!--") {
114                let start = state.get_position();
115                state.advance(self.config.variable_start.len() + 3);
116                while state.not_at_end() {
117                    let current_rest = state.rest();
118                    if current_rest.starts_with("--") && current_rest[2..].starts_with(&self.config.variable_end) {
119                        state.advance(2 + self.config.variable_end.len());
120                        let end = state.get_position();
121                        state.add_token(HandlebarsTokenType::Comment, start, end);
122                        return true;
123                    }
124                    state.advance(1);
125                }
126                return true;
127            }
128            else if comment_rest.starts_with('!') {
129                let start = state.get_position();
130                state.advance(self.config.variable_start.len() + 1);
131                while state.not_at_end() {
132                    if state.rest().starts_with(&self.config.variable_end) {
133                        state.advance(self.config.variable_end.len());
134                        let end = state.get_position();
135                        state.add_token(HandlebarsTokenType::Comment, start, end);
136                        return true;
137                    }
138                    state.advance(1);
139                }
140                return true;
141            }
142        }
143        false
144    }
145
146    fn lex_handlebars_expression<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
147        let start = state.get_position();
148        let rest = state.rest();
149
150        if rest.starts_with(&self.config.unescaped_start) {
151            state.advance(self.config.unescaped_start.len());
152            state.add_token(HandlebarsTokenType::OpenUnescaped, start, state.get_position());
153            true
154        }
155        else if rest.starts_with(&self.config.variable_start) {
156            state.advance(self.config.variable_start.len());
157            state.add_token(HandlebarsTokenType::Open, start, state.get_position());
158            true
159        }
160        else if rest.starts_with(&self.config.unescaped_end) {
161            state.advance(self.config.unescaped_end.len());
162            state.add_token(HandlebarsTokenType::CloseUnescaped, start, state.get_position());
163            true
164        }
165        else if rest.starts_with(&self.config.variable_end) {
166            state.advance(self.config.variable_end.len());
167            state.add_token(HandlebarsTokenType::Close, start, state.get_position());
168            true
169        }
170        else {
171            false
172        }
173    }
174
175    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
176        let config = if state.current() == Some('"') {
177            &*HB_STRING_DOUBLE
178        }
179        else if state.current() == Some('\'') {
180            &*HB_STRING_SINGLE
181        }
182        else {
183            return false;
184        };
185
186        config.scan(state, HandlebarsTokenType::StringLiteral)
187    }
188
189    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
190        if let Some(c) = state.current() {
191            if c.is_ascii_digit() {
192                let start = state.get_position();
193                while let Some(c) = state.current() {
194                    if c.is_ascii_digit() || c == '.' { state.advance(1) } else { break }
195                }
196                let end = state.get_position();
197                state.add_token(HandlebarsTokenType::NumberLiteral, start, end);
198                true
199            }
200            else {
201                false
202            }
203        }
204        else {
205            false
206        }
207    }
208
209    fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
210        if let Some(c) = state.current() {
211            if c.is_alphabetic() || c == '_' || c == '@' {
212                let start = state.get_position();
213                while let Some(c) = state.current() {
214                    if c.is_alphanumeric() || c == '_' || c == '-' || c == '.' { state.advance(1) } else { break }
215                }
216                let end = state.get_position();
217                let text = state.get_text_in(Range { start, end });
218                let kind = match text.as_ref() {
219                    "else" => HandlebarsTokenType::Else,
220                    "true" | "false" => HandlebarsTokenType::BooleanLiteral,
221                    _ => HandlebarsTokenType::Identifier,
222                };
223                state.add_token(kind, start, end);
224                true
225            }
226            else {
227                false
228            }
229        }
230        else {
231            false
232        }
233    }
234
235    fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
236        if let Some(c) = state.current() {
237            let start = state.get_position();
238            let kind = match c {
239                '(' => HandlebarsTokenType::LeftParen,
240                ')' => HandlebarsTokenType::RightParen,
241                '[' => HandlebarsTokenType::LeftBracket,
242                ']' => HandlebarsTokenType::RightBracket,
243                '=' => HandlebarsTokenType::Equal,
244                '|' => HandlebarsTokenType::Pipe,
245                '#' => HandlebarsTokenType::Hash,
246                '.' => HandlebarsTokenType::Dot,
247                '/' => HandlebarsTokenType::Slash,
248                '@' => HandlebarsTokenType::At,
249                '^' => HandlebarsTokenType::Caret,
250                _ => return false,
251            };
252            state.advance(1);
253            let end = state.get_position();
254            state.add_token(kind, start, end);
255            true
256        }
257        else {
258            false
259        }
260    }
261
262    fn lex_content<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
263        let start = state.get_position();
264        let mut count = 0;
265
266        while let Some(c) = state.current() {
267            let rest = state.rest();
268            if rest.starts_with(&self.config.variable_start) || rest.starts_with(&self.config.unescaped_start) {
269                break;
270            }
271            state.advance(c.len_utf8());
272            count += 1
273        }
274
275        if count > 0 {
276            let end = state.get_position();
277            state.add_token(HandlebarsTokenType::Content, start, end);
278            true
279        }
280        else {
281            false
282        }
283    }
284}