oak_handlebars/lexer/
mod.rs1use crate::{kind::HandlebarsSyntaxKind, language::HandlebarsLanguage};
2use oak_core::{
3 IncrementalCache, Lexer, LexerState, OakError, Token,
4 lexer::{LexOutput, StringConfig, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<S> = LexerState<S, HandlebarsLanguage>;
10
11static HB_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
13static HB_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"', '\''], escape: Some('\\') });
14
15#[derive(Clone)]
16pub struct HandlebarsLexer<'config> {
17 config: &'config HandlebarsLanguage,
18}
19
20impl<'config> Lexer<HandlebarsLanguage> for HandlebarsLexer<'config> {
21 fn lex_incremental(
22 &self,
23 source: impl Source,
24 changed: usize,
25 cache: IncrementalCache<HandlebarsLanguage>,
26 ) -> LexOutput<HandlebarsLanguage> {
27 let mut state = LexerState::new_with_cache(source, changed, cache);
28 let result = self.run(&mut state);
29 state.finish(result)
30 }
31}
32
33impl<'config> HandlebarsLexer<'config> {
34 pub fn new(config: &'config HandlebarsLanguage) -> Self {
35 Self { config }
36 }
37
38 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
39 while state.not_at_end() {
40 let safe_point = state.get_position();
41
42 if self.skip_whitespace(state) {
43 continue;
44 }
45
46 if self.skip_newline(state) {
47 continue;
48 }
49
50 if self.lex_comment(state) {
51 continue;
52 }
53
54 if self.lex_handlebars_expression(state) {
55 continue;
56 }
57
58 if self.lex_string_literal(state) {
59 continue;
60 }
61
62 if self.lex_number_literal(state) {
63 continue;
64 }
65
66 if self.lex_identifier(state) {
67 continue;
68 }
69
70 if self.lex_single_char_tokens(state) {
71 continue;
72 }
73
74 if self.lex_content(state) {
75 continue;
76 }
77
78 state.safe_check(safe_point);
79 }
80
81 let eof_pos = state.get_position();
83 state.add_token(HandlebarsSyntaxKind::Eof, eof_pos, eof_pos);
84 Ok(())
85 }
86
87 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
88 match HB_WHITESPACE.scan(state.rest(), state.get_position(), HandlebarsSyntaxKind::Whitespace) {
89 Some(token) => {
90 state.advance_with(token);
91 true
92 }
93 None => false,
94 }
95 }
96
97 fn skip_newline<S: Source>(&self, state: &mut State<S>) -> bool {
98 if state.current() == Some('\n') || state.current() == Some('\r') {
99 let start = state.get_position();
100 state.advance(1);
101 if state.current() == Some('\n') && state.peek() == Some('\r') {
102 state.advance(1);
103 }
104 let end = state.get_position();
105 state.add_token(HandlebarsSyntaxKind::Newline, start, end);
106 true
107 }
108 else {
109 false
110 }
111 }
112
113 fn lex_comment<S: Source>(&self, state: &mut State<S>) -> bool {
114 let rest = state.rest();
115 if rest.starts_with("{{!--") {
116 let start = state.get_position();
117 state.advance(5); while state.not_at_end() {
121 if state.rest().starts_with("--}}") {
122 state.advance(4); break;
124 }
125 state.advance(1);
126 }
127
128 let end = state.get_position();
129 state.add_token(HandlebarsSyntaxKind::Comment, start, end);
130 true
131 }
132 else if rest.starts_with("{{!") {
133 let start = state.get_position();
134 state.advance(3); while state.not_at_end() {
138 if state.rest().starts_with("}}") {
139 state.advance(2); break;
141 }
142 state.advance(1);
143 }
144
145 let end = state.get_position();
146 state.add_token(HandlebarsSyntaxKind::Comment, start, end);
147 true
148 }
149 else {
150 false
151 }
152 }
153
154 fn lex_handlebars_expression<S: Source>(&self, state: &mut State<S>) -> bool {
155 let rest = state.rest();
156 let start = state.get_position();
157
158 if rest.starts_with("{{{") {
159 state.advance(3);
160 let end = state.get_position();
161 state.add_token(HandlebarsSyntaxKind::OpenUnescaped, start, end);
162 true
163 }
164 else if rest.starts_with("{{#") {
165 state.advance(3);
166 let end = state.get_position();
167 state.add_token(HandlebarsSyntaxKind::OpenBlock, start, end);
168 true
169 }
170 else if rest.starts_with("{{/") {
171 state.advance(3);
172 let end = state.get_position();
173 state.add_token(HandlebarsSyntaxKind::CloseBlock, start, end);
174 true
175 }
176 else if rest.starts_with("{{>") {
177 state.advance(3);
178 let end = state.get_position();
179 state.add_token(HandlebarsSyntaxKind::OpenPartial, start, end);
180 true
181 }
182 else if rest.starts_with("{{") {
183 state.advance(2);
184 let end = state.get_position();
185 state.add_token(HandlebarsSyntaxKind::Open, start, end);
186 true
187 }
188 else if rest.starts_with("}}}") {
189 state.advance(3);
190 let end = state.get_position();
191 state.add_token(HandlebarsSyntaxKind::CloseUnescaped, start, end);
192 true
193 }
194 else if rest.starts_with("}}") {
195 state.advance(2);
196 let end = state.get_position();
197 state.add_token(HandlebarsSyntaxKind::Close, start, end);
198 true
199 }
200 else {
201 false
202 }
203 }
204
205 fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
206 match HB_STRING.scan(state.rest(), 0, HandlebarsSyntaxKind::StringLiteral) {
207 Some(token) => {
208 use std::range::Range;
210 let adjusted_token = Token {
211 kind: token.kind,
212 span: Range { start: token.span.start + state.get_position(), end: token.span.end + state.get_position() },
213 };
214 state.advance_with(adjusted_token);
215 true
216 }
217 None => false,
218 }
219 }
220
221 fn lex_number_literal<S: Source>(&self, state: &mut State<S>) -> bool {
222 if let Some(c) = state.current() {
223 if c.is_ascii_digit() {
224 let start = state.get_position();
225 while let Some(c) = state.current() {
226 if c.is_ascii_digit() || c == '.' {
227 state.advance(1);
228 }
229 else {
230 break;
231 }
232 }
233 let end = state.get_position();
234 state.add_token(HandlebarsSyntaxKind::NumberLiteral, start, end);
235 true
236 }
237 else {
238 false
239 }
240 }
241 else {
242 false
243 }
244 }
245
246 fn lex_identifier<S: Source>(&self, state: &mut State<S>) -> bool {
247 if let Some(c) = state.current() {
248 if c.is_alphabetic() || c == '_' {
249 let start = state.get_position();
250 while let Some(c) = state.current() {
251 if c.is_alphanumeric() || c == '_' || c == '-' || c == '.' {
252 state.advance(1);
253 }
254 else {
255 break;
256 }
257 }
258 let end = state.get_position();
259 state.add_token(HandlebarsSyntaxKind::Identifier, start, end);
260 true
261 }
262 else {
263 false
264 }
265 }
266 else {
267 false
268 }
269 }
270
271 fn lex_single_char_tokens<S: Source>(&self, state: &mut State<S>) -> bool {
272 if let Some(c) = state.current() {
273 let start = state.get_position();
274 let kind = match c {
275 '(' => HandlebarsSyntaxKind::LeftParen,
276 ')' => HandlebarsSyntaxKind::RightParen,
277 '[' => HandlebarsSyntaxKind::LeftBracket,
278 ']' => HandlebarsSyntaxKind::RightBracket,
279 '=' => HandlebarsSyntaxKind::Equal,
280 '|' => HandlebarsSyntaxKind::Pipe,
281 '#' => HandlebarsSyntaxKind::Hash,
282 '.' => HandlebarsSyntaxKind::Dot,
283 '/' => HandlebarsSyntaxKind::Slash,
284 _ => return false,
285 };
286 state.advance(1);
287 let end = state.get_position();
288 state.add_token(kind, start, end);
289 true
290 }
291 else {
292 false
293 }
294 }
295
296 fn lex_content<S: Source>(&self, state: &mut State<S>) -> bool {
297 let start = state.get_position();
298 let mut has_content = false;
299
300 while state.not_at_end() {
301 let rest = state.rest();
302 if rest.starts_with("{{") {
304 break;
305 }
306 state.advance(1);
307 has_content = true;
308 }
309
310 if has_content {
311 let end = state.get_position();
312 state.add_token(HandlebarsSyntaxKind::Content, start, end);
313 true
314 }
315 else {
316 false
317 }
318 }
319}