1use std::fmt;
9
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub enum Token {
12 Text(String),
14
15 EscapedExpr(String),
17
18 RawExpr(String),
20
21 LiteralExpr(String),
26
27 LiteralDirective { name: String, args: Option<String> },
30
31 Directive { name: String, args: Option<String> },
33
34 ComponentOpen {
36 name: String,
37 attrs: Vec<(String, String)>,
38 self_closing: bool,
39 },
40
41 ComponentClose { name: String },
43}
44
45impl fmt::Display for Token {
46 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47 match self {
48 Token::Text(s) => write!(f, "{s}"),
49 Token::EscapedExpr(e) => write!(f, "{{{{ {e} }}}}"),
50 Token::RawExpr(e) => write!(f, "{{!! {e} !!}}"),
51 Token::LiteralExpr(e) => write!(f, "@{{{{ {e} }}}}"),
52 Token::LiteralDirective {
53 name,
54 args: Some(a),
55 } => write!(f, "@@{name}({a})"),
56 Token::LiteralDirective { name, args: None } => write!(f, "@@{name}"),
57 Token::Directive {
58 name,
59 args: Some(a),
60 } => write!(f, "@{name}({a})"),
61 Token::Directive { name, args: None } => write!(f, "@{name}"),
62 Token::ComponentOpen {
63 name,
64 attrs,
65 self_closing,
66 } => {
67 write!(f, "<x-{name}")?;
68 for (k, v) in attrs {
69 write!(f, " {k}=\"{v}\"")?;
70 }
71 if *self_closing {
72 write!(f, " />")
73 } else {
74 write!(f, ">")
75 }
76 }
77 Token::ComponentClose { name } => write!(f, "</x-{name}>"),
78 }
79 }
80}
81
82pub fn tokenize(input: &str) -> Vec<Token> {
83 let mut tokens = Vec::new();
84 let bytes = input.as_bytes();
85 let mut i = 0;
86 let mut text_start = 0;
87
88 while i < bytes.len() {
89 if i + 2 < bytes.len() && bytes[i] == b'@' && bytes[i + 1] == b'{' && bytes[i + 2] == b'{' {
93 flush_text(input, text_start, i, &mut tokens);
94 if let Some(end) = find_close(&input[i + 3..], "}}") {
95 let expr = input[i + 3..i + 3 + end].trim().to_string();
96 tokens.push(Token::LiteralExpr(expr));
97 i += 3 + end + 2;
98 text_start = i;
99 continue;
100 }
101 }
102
103 if i + 1 < bytes.len()
105 && bytes[i] == b'@'
106 && bytes[i + 1] == b'@'
107 && i + 2 < bytes.len()
108 && (bytes[i + 2].is_ascii_alphabetic() || bytes[i + 2] == b'_')
109 {
110 flush_text(input, text_start, i, &mut tokens);
111 let dir_start = i + 2;
112 let mut dir_end = dir_start;
113 while dir_end < bytes.len()
114 && (bytes[dir_end].is_ascii_alphanumeric() || bytes[dir_end] == b'_')
115 {
116 dir_end += 1;
117 }
118 let name = input[dir_start..dir_end].to_string();
119 let mut args = None;
120 let mut new_i = dir_end;
121 if dir_end < bytes.len() && bytes[dir_end] == b'(' {
122 if let Some(close_offset) = find_matching_paren(&input[dir_end..]) {
123 args = Some(input[dir_end + 1..dir_end + close_offset].to_string());
124 new_i = dir_end + close_offset + 1;
125 }
126 }
127 tokens.push(Token::LiteralDirective { name, args });
128 i = new_i;
129 text_start = i;
130 continue;
131 }
132
133 if i + 1 < bytes.len()
135 && bytes[i] == b'{'
136 && bytes[i + 1] == b'{'
137 && !(i + 2 < bytes.len() && bytes[i + 2] == b'-')
138 {
139 flush_text(input, text_start, i, &mut tokens);
140 if let Some(end) = find_close(&input[i + 2..], "}}") {
141 let expr = input[i + 2..i + 2 + end].trim().to_string();
142 tokens.push(Token::EscapedExpr(expr));
143 i += 2 + end + 2;
144 text_start = i;
145 continue;
146 }
147 }
148
149 if i + 2 < bytes.len() && bytes[i] == b'{' && bytes[i + 1] == b'!' && bytes[i + 2] == b'!' {
151 flush_text(input, text_start, i, &mut tokens);
152 if let Some(end) = find_close(&input[i + 3..], "!!}") {
153 let expr = input[i + 3..i + 3 + end].trim().to_string();
154 tokens.push(Token::RawExpr(expr));
155 i += 3 + end + 3;
156 text_start = i;
157 continue;
158 }
159 }
160
161 if bytes[i] == b'@'
163 && i + 1 < bytes.len()
164 && (bytes[i + 1].is_ascii_alphabetic() || bytes[i + 1] == b'_')
165 {
166 flush_text(input, text_start, i, &mut tokens);
167 let dir_start = i + 1;
168 let mut dir_end = dir_start;
169 while dir_end < bytes.len()
170 && (bytes[dir_end].is_ascii_alphanumeric() || bytes[dir_end] == b'_')
171 {
172 dir_end += 1;
173 }
174 let name = input[dir_start..dir_end].to_string();
175 let mut args = None;
176 let mut new_i = dir_end;
177 if dir_end < bytes.len() && bytes[dir_end] == b'(' {
178 if let Some(close_offset) = find_matching_paren(&input[dir_end..]) {
179 args = Some(input[dir_end + 1..dir_end + close_offset].to_string());
180 new_i = dir_end + close_offset + 1;
181 }
182 }
183 tokens.push(Token::Directive { name, args });
184 i = new_i;
185 text_start = i;
186 continue;
187 }
188
189 if bytes[i] == b'<' && i + 2 < bytes.len() && bytes[i + 1] == b'x' && bytes[i + 2] == b'-' {
191 flush_text(input, text_start, i, &mut tokens);
192 let after = &input[i + 3..];
193 let name_end = after
195 .find(|c: char| c.is_whitespace() || c == '>' || c == '/')
196 .unwrap_or(after.len());
197 let name = after[..name_end].to_string();
198 let rest_start = i + 3 + name_end;
199 let close_offset = input[rest_start..]
200 .find('>')
201 .unwrap_or(input.len() - rest_start);
202 let tag_inner = &input[rest_start..rest_start + close_offset];
203 let self_closing = tag_inner.ends_with('/');
204 let attrs = parse_attrs(tag_inner.trim_end_matches('/'));
205 tokens.push(Token::ComponentOpen {
206 name,
207 attrs,
208 self_closing,
209 });
210 i = rest_start + close_offset + 1;
211 text_start = i;
212 continue;
213 }
214
215 if bytes[i] == b'<'
217 && i + 3 < bytes.len()
218 && bytes[i + 1] == b'/'
219 && bytes[i + 2] == b'x'
220 && bytes[i + 3] == b'-'
221 {
222 flush_text(input, text_start, i, &mut tokens);
223 let after = &input[i + 4..];
224 let name_end = after.find('>').unwrap_or(after.len());
225 let name = after[..name_end].trim().to_string();
226 tokens.push(Token::ComponentClose { name });
227 i += 4 + name_end + 1;
228 text_start = i;
229 continue;
230 }
231
232 i += 1;
233 }
234
235 flush_text(input, text_start, bytes.len(), &mut tokens);
236 tokens
237}
238
239fn flush_text(input: &str, start: usize, end: usize, tokens: &mut Vec<Token>) {
240 if end > start {
241 tokens.push(Token::Text(input[start..end].to_string()));
242 }
243}
244
245fn find_close(s: &str, needle: &str) -> Option<usize> {
246 s.find(needle)
247}
248
249fn find_matching_paren(s: &str) -> Option<usize> {
250 let bytes = s.as_bytes();
252 if bytes.is_empty() || bytes[0] != b'(' {
253 return None;
254 }
255 let mut depth = 1;
256 let mut in_string = None::<u8>;
257 for (i, &b) in bytes.iter().enumerate().skip(1) {
258 if let Some(quote) = in_string {
259 if b == quote && bytes.get(i - 1) != Some(&b'\\') {
260 in_string = None;
261 }
262 continue;
263 }
264 match b {
265 b'"' | b'\'' => in_string = Some(b),
266 b'(' => depth += 1,
267 b')' => {
268 depth -= 1;
269 if depth == 0 {
270 return Some(i);
271 }
272 }
273 _ => {}
274 }
275 }
276 None
277}
278
279fn parse_attrs(s: &str) -> Vec<(String, String)> {
280 let mut attrs = Vec::new();
281 let mut chars = s.char_indices().peekable();
282 while let Some((_, ch)) = chars.peek() {
283 if ch.is_whitespace() {
284 chars.next();
285 continue;
286 }
287 let mut name_end = 0;
289 let mut name = String::new();
290 let mut found_eq = false;
291 while let Some(&(idx, c)) = chars.peek() {
292 if c == '=' {
293 found_eq = true;
294 name_end = idx;
295 chars.next();
296 break;
297 }
298 if c.is_whitespace() {
299 name_end = idx;
300 break;
301 }
302 name.push(c);
303 chars.next();
304 }
305 let _ = name_end;
306 if !found_eq {
307 attrs.push((name, String::new()));
308 continue;
309 }
310 if let Some(&(_, q)) = chars.peek() {
312 if q == '"' || q == '\'' {
313 chars.next();
314 let mut val = String::new();
315 while let Some(&(_, c)) = chars.peek() {
316 chars.next();
317 if c == q {
318 break;
319 }
320 val.push(c);
321 }
322 attrs.push((name, val));
323 continue;
324 }
325 }
326 let mut val = String::new();
328 while let Some(&(_, c)) = chars.peek() {
329 if c.is_whitespace() {
330 break;
331 }
332 val.push(c);
333 chars.next();
334 }
335 attrs.push((name, val));
336 }
337 attrs
338}
339
340#[cfg(test)]
341mod tests {
342 use super::*;
343
344 fn first_non_text(tokens: &[Token]) -> &Token {
345 tokens
346 .iter()
347 .find(|t| !matches!(t, Token::Text(s) if s.trim().is_empty()))
348 .expect("no non-empty token")
349 }
350
351 #[test]
352 fn at_double_brace_emits_literal_expr_not_escaped() {
353 let toks = tokenize("@{{ handle }}");
354 assert!(
355 matches!(first_non_text(&toks), Token::LiteralExpr(s) if s == "handle"),
356 "got: {toks:?}"
357 );
358 }
359
360 #[test]
361 fn double_at_directive_emits_literal_directive() {
362 let toks = tokenize("@@if(user)");
363 let first = first_non_text(&toks);
364 assert!(
365 matches!(first, Token::LiteralDirective { name, args }
366 if name == "if" && args.as_deref() == Some("user")),
367 "got: {toks:?}"
368 );
369 }
370
371 #[test]
372 fn double_at_directive_without_args() {
373 let toks = tokenize("@@verbatim");
374 let first = first_non_text(&toks);
375 assert!(
376 matches!(first, Token::LiteralDirective { name, args }
377 if name == "verbatim" && args.is_none()),
378 "got: {toks:?}"
379 );
380 }
381
382 #[test]
383 fn at_double_brace_does_not_consume_following_real_interpolation() {
384 let toks = tokenize("@{{ '' }}{{ handle }}");
387 let mut iter = toks
388 .iter()
389 .filter(|t| !matches!(t, Token::Text(s) if s.trim().is_empty()));
390 let first = iter.next().expect("first");
391 let second = iter.next().expect("second");
392 assert!(
393 matches!(first, Token::LiteralExpr(s) if s == "''"),
394 "first: {first:?}"
395 );
396 assert!(
397 matches!(second, Token::EscapedExpr(s) if s == "handle"),
398 "second: {second:?}"
399 );
400 }
401
402 #[test]
403 fn regular_directive_still_works() {
404 let toks = tokenize("@if(x)");
406 let first = first_non_text(&toks);
407 assert!(
408 matches!(first, Token::Directive { name, args }
409 if name == "if" && args.as_deref() == Some("x")),
410 "got: {toks:?}"
411 );
412 }
413
414 #[test]
415 fn regular_interpolation_still_works() {
416 let toks = tokenize("{{ name }}");
418 let first = first_non_text(&toks);
419 assert!(
420 matches!(first, Token::EscapedExpr(s) if s == "name"),
421 "got: {toks:?}"
422 );
423 }
424}