1#![doc = include_str!("readme.md")]
2use crate::language::HandlebarsLanguage;
3pub mod token_type;
4pub use token_type::HandlebarsTokenType;
5
6use oak_core::{
7 Lexer, LexerCache, LexerState, OakError, Range,
8 lexer::{LexOutput, StringConfig, WhitespaceConfig},
9 source::Source,
10};
11use std::sync::LazyLock;
12
13pub(crate) type State<'a, S> = LexerState<'a, S, HandlebarsLanguage>;
14
15static HB_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
17static HB_STRING_DOUBLE: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
18static HB_STRING_SINGLE: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
19
20#[derive(Clone)]
22pub struct HandlebarsLexer<'config> {
23 config: &'config HandlebarsLanguage,
24}
25
26impl<'config> Lexer<HandlebarsLanguage> for HandlebarsLexer<'config> {
27 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<HandlebarsLanguage>) -> LexOutput<HandlebarsLanguage> {
28 let mut state: State<'_, S> = LexerState::new(source);
29 let result = self.run(&mut state);
30 if result.is_ok() {
31 state.add_eof()
32 }
33 state.finish_with_cache(result, cache)
34 }
35}
36
37impl<'config> HandlebarsLexer<'config> {
38 pub fn new(config: &'config HandlebarsLanguage) -> Self {
40 Self { config }
41 }
42
43 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
44 while state.not_at_end() {
45 let safe_point = state.get_position();
46
47 if self.skip_whitespace(state) {
48 continue;
49 }
50
51 if self.skip_newline(state) {
52 continue;
53 }
54
55 if self.lex_comment(state) {
56 continue;
57 }
58
59 if self.lex_handlebars_expression(state) {
60 continue;
61 }
62
63 if self.lex_string_literal(state) {
64 continue;
65 }
66
67 if self.lex_number_literal(state) {
68 continue;
69 }
70
71 if self.lex_identifier(state) {
72 continue;
73 }
74
75 if self.lex_single_char_tokens(state) {
76 continue;
77 }
78
79 if self.lex_content(state) {
80 continue;
81 }
82
83 state.advance_if_dead_lock(safe_point)
84 }
85
86 Ok(())
87 }
88
89 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
90 HB_WHITESPACE.scan(state, HandlebarsTokenType::Whitespace)
91 }
92
93 fn skip_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
94 if state.current() == Some('\n') || state.current() == Some('\r') {
95 let start = state.get_position();
96 state.advance(1);
97 if state.current() == Some('\n') && state.peek() == Some('\r') {
98 state.advance(1)
99 }
100 let end = state.get_position();
101 state.add_token(HandlebarsTokenType::Newline, start, end);
102 true
103 }
104 else {
105 false
106 }
107 }
108
109 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
110 let rest = state.rest();
111 if rest.starts_with(&self.config.variable_start) {
112 let comment_rest = &rest[self.config.variable_start.len()..];
113 if comment_rest.starts_with("!--") {
114 let start = state.get_position();
115 state.advance(self.config.variable_start.len() + 3);
116 while state.not_at_end() {
117 let current_rest = state.rest();
118 if current_rest.starts_with("--") && current_rest[2..].starts_with(&self.config.variable_end) {
119 state.advance(2 + self.config.variable_end.len());
120 let end = state.get_position();
121 state.add_token(HandlebarsTokenType::Comment, start, end);
122 return true;
123 }
124 state.advance(1);
125 }
126 return true;
127 }
128 else if comment_rest.starts_with('!') {
129 let start = state.get_position();
130 state.advance(self.config.variable_start.len() + 1);
131 while state.not_at_end() {
132 if state.rest().starts_with(&self.config.variable_end) {
133 state.advance(self.config.variable_end.len());
134 let end = state.get_position();
135 state.add_token(HandlebarsTokenType::Comment, start, end);
136 return true;
137 }
138 state.advance(1);
139 }
140 return true;
141 }
142 }
143 false
144 }
145
146 fn lex_handlebars_expression<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
147 let start = state.get_position();
148 let rest = state.rest();
149
150 if rest.starts_with(&self.config.unescaped_start) {
151 state.advance(self.config.unescaped_start.len());
152 state.add_token(HandlebarsTokenType::OpenUnescaped, start, state.get_position());
153 true
154 }
155 else if rest.starts_with(&self.config.variable_start) {
156 state.advance(self.config.variable_start.len());
157 state.add_token(HandlebarsTokenType::Open, start, state.get_position());
158 true
159 }
160 else if rest.starts_with(&self.config.unescaped_end) {
161 state.advance(self.config.unescaped_end.len());
162 state.add_token(HandlebarsTokenType::CloseUnescaped, start, state.get_position());
163 true
164 }
165 else if rest.starts_with(&self.config.variable_end) {
166 state.advance(self.config.variable_end.len());
167 state.add_token(HandlebarsTokenType::Close, start, state.get_position());
168 true
169 }
170 else {
171 false
172 }
173 }
174
175 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
176 let config = if state.current() == Some('"') {
177 &*HB_STRING_DOUBLE
178 }
179 else if state.current() == Some('\'') {
180 &*HB_STRING_SINGLE
181 }
182 else {
183 return false;
184 };
185
186 config.scan(state, HandlebarsTokenType::StringLiteral)
187 }
188
189 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
190 if let Some(c) = state.current() {
191 if c.is_ascii_digit() {
192 let start = state.get_position();
193 while let Some(c) = state.current() {
194 if c.is_ascii_digit() || c == '.' { state.advance(1) } else { break }
195 }
196 let end = state.get_position();
197 state.add_token(HandlebarsTokenType::NumberLiteral, start, end);
198 true
199 }
200 else {
201 false
202 }
203 }
204 else {
205 false
206 }
207 }
208
209 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
210 if let Some(c) = state.current() {
211 if c.is_alphabetic() || c == '_' || c == '@' {
212 let start = state.get_position();
213 while let Some(c) = state.current() {
214 if c.is_alphanumeric() || c == '_' || c == '-' || c == '.' { state.advance(1) } else { break }
215 }
216 let end = state.get_position();
217 let text = state.get_text_in(Range { start, end });
218 let kind = match text.as_ref() {
219 "else" => HandlebarsTokenType::Else,
220 "true" | "false" => HandlebarsTokenType::BooleanLiteral,
221 _ => HandlebarsTokenType::Identifier,
222 };
223 state.add_token(kind, start, end);
224 true
225 }
226 else {
227 false
228 }
229 }
230 else {
231 false
232 }
233 }
234
235 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
236 if let Some(c) = state.current() {
237 let start = state.get_position();
238 let kind = match c {
239 '(' => HandlebarsTokenType::LeftParen,
240 ')' => HandlebarsTokenType::RightParen,
241 '[' => HandlebarsTokenType::LeftBracket,
242 ']' => HandlebarsTokenType::RightBracket,
243 '=' => HandlebarsTokenType::Equal,
244 '|' => HandlebarsTokenType::Pipe,
245 '#' => HandlebarsTokenType::Hash,
246 '.' => HandlebarsTokenType::Dot,
247 '/' => HandlebarsTokenType::Slash,
248 '@' => HandlebarsTokenType::At,
249 '^' => HandlebarsTokenType::Caret,
250 _ => return false,
251 };
252 state.advance(1);
253 let end = state.get_position();
254 state.add_token(kind, start, end);
255 true
256 }
257 else {
258 false
259 }
260 }
261
262 fn lex_content<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
263 let start = state.get_position();
264 let mut count = 0;
265
266 while let Some(c) = state.current() {
267 let rest = state.rest();
268 if rest.starts_with(&self.config.variable_start) || rest.starts_with(&self.config.unescaped_start) {
269 break;
270 }
271 state.advance(c.len_utf8());
272 count += 1
273 }
274
275 if count > 0 {
276 let end = state.get_position();
277 state.add_token(HandlebarsTokenType::Content, start, end);
278 true
279 }
280 else {
281 false
282 }
283 }
284}