1use crate::error::{Error, Result, Span};
4
5#[derive(Debug, Clone, PartialEq)]
7pub enum TokenKind {
8 RawText(String),
10 RawBody(String),
12 CommentBody(String),
14
15 BlockOpen, ContinueOpen, BlockClose, SpecialOpen, CommentOpen, ExprOpen, ExprOpenRaw, Close, CommentClose, KwIf,
27 KwElse,
28 KwEach,
29 KwAs,
30 KwSnippet,
31 KwRaw,
32 KwRender,
33 KwConst,
34 KwInclude,
35 KwDebug,
36 KwIs,
37 KwNot,
38 KwIn,
39
40 StringLit(String),
41 IntLit(i64),
42 FloatLit(f64),
43 True,
44 False,
45 Null,
46
47 Ident(String),
48
49 Pipe, Or, And, Question, NullCoalesce, Colon, Eq, Neq, Assign, Lt, Gt, Lte, Gte, Add, Sub, Mul, Div, Mod, Bang, Dot, LParen, RParen, LBracket, RBracket, LBraceD, RBraceD, Comma, Eof,
79}
80
81impl std::fmt::Display for TokenKind {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 let s = match self {
84 TokenKind::RawText(_) => "raw text",
85 TokenKind::RawBody(_) => "raw block content",
86 TokenKind::CommentBody(_) => "comment content",
87
88 TokenKind::BlockOpen => "'{#'",
89 TokenKind::ContinueOpen => "'{:'",
90 TokenKind::BlockClose => "'{/'",
91 TokenKind::SpecialOpen => "'{@'",
92 TokenKind::CommentOpen => "'{!'",
93 TokenKind::ExprOpen => "'{='",
94 TokenKind::ExprOpenRaw => "'{~'",
95 TokenKind::Close => "'}'",
96 TokenKind::CommentClose => "'!}'",
97
98 TokenKind::KwIf => "keyword 'if'",
99 TokenKind::KwElse => "keyword 'else'",
100 TokenKind::KwEach => "keyword 'each'",
101 TokenKind::KwAs => "keyword 'as'",
102 TokenKind::KwSnippet => "keyword 'snippet'",
103 TokenKind::KwRaw => "keyword 'raw'",
104 TokenKind::KwRender => "keyword 'render'",
105 TokenKind::KwConst => "keyword 'const'",
106 TokenKind::KwInclude => "keyword 'include'",
107 TokenKind::KwDebug => "keyword 'debug'",
108 TokenKind::KwIs => "keyword 'is'",
109 TokenKind::KwNot => "keyword 'not'",
110 TokenKind::KwIn => "keyword 'in'",
111
112 TokenKind::True => "'true'",
113 TokenKind::False => "'false'",
114 TokenKind::Null => "'null'",
115
116 TokenKind::Pipe => "'|'",
117 TokenKind::Or => "'||'",
118 TokenKind::And => "'&&'",
119 TokenKind::Question => "'?'",
120 TokenKind::NullCoalesce => "'??'",
121 TokenKind::Colon => "':'",
122 TokenKind::Eq => "'=='",
123 TokenKind::Neq => "'!='",
124 TokenKind::Assign => "'='",
125 TokenKind::Lt => "'<'",
126 TokenKind::Gt => "'>'",
127 TokenKind::Lte => "'<='",
128 TokenKind::Gte => "'>='",
129 TokenKind::Add => "'+'",
130 TokenKind::Sub => "'-'",
131 TokenKind::Mul => "'*'",
132 TokenKind::Div => "'/'",
133 TokenKind::Mod => "'%'",
134 TokenKind::Bang => "'!'",
135 TokenKind::Dot => "'.'",
136
137 TokenKind::LParen => "'('",
138 TokenKind::RParen => "')'",
139 TokenKind::LBracket => "'['",
140 TokenKind::RBracket => "']'",
141 TokenKind::LBraceD => "'{'",
142 TokenKind::RBraceD => "'}'",
143 TokenKind::Comma => "','",
144
145 TokenKind::Eof => "end of template",
146
147 TokenKind::StringLit(s) => return write!(f, "string '{s}'"),
149 TokenKind::IntLit(n) => return write!(f, "integer '{n}'"),
150 TokenKind::FloatLit(n) => return write!(f, "float '{n}'"),
151 TokenKind::Ident(s) => return write!(f, "'{s}'"),
152 };
153 f.write_str(s)
154 }
155}
156
157#[derive(Debug, Clone)]
159pub struct Token {
160 pub kind: TokenKind,
161 pub span: Span,
162}
163
164pub struct Lexer {
166 chars: Vec<char>,
167 pos: usize,
168 line: u32,
169 col: u32,
170 offset: usize,
171}
172
173impl Lexer {
174 pub fn new(src: &str) -> Self {
175 Lexer {
176 chars: src.chars().collect(),
177 pos: 0,
178 line: 1,
179 col: 1,
180 offset: 0,
181 }
182 }
183
184 pub fn tokenize(mut self) -> Result<Vec<Token>> {
186 let mut tokens = Vec::new();
187 self.lex_template(&mut tokens)?;
188 tokens.push(Token {
189 kind: TokenKind::Eof,
190 span: self.span(),
191 });
192 Ok(tokens)
193 }
194
195 fn span(&self) -> Span {
196 Span::new(self.line, self.col, self.offset)
197 }
198
199 fn peek(&self) -> Option<char> {
200 self.chars.get(self.pos).copied()
201 }
202
203 fn peek_at(&self, offset: usize) -> Option<char> {
204 self.chars.get(self.pos + offset).copied()
205 }
206
207 fn matches_at(&self, offset: usize, s: &str) -> bool {
208 s.chars()
209 .enumerate()
210 .all(|(i, c)| self.peek_at(offset + i) == Some(c))
211 }
212
213 fn advance(&mut self) -> Option<char> {
214 let c = self.chars.get(self.pos).copied()?;
215 self.pos += 1;
216 self.offset += c.len_utf8();
217 if c == '\n' {
218 self.line += 1;
219 self.col = 1;
220 } else {
221 self.col += 1;
222 }
223 Some(c)
224 }
225
226 fn advance_if(&mut self, c: char) -> bool {
227 if self.peek() == Some(c) {
228 self.advance();
229 true
230 } else {
231 false
232 }
233 }
234
235 fn at_end(&self) -> bool {
236 self.pos >= self.chars.len()
237 }
238
239 fn lex_template(&mut self, tokens: &mut Vec<Token>) -> Result<()> {
240 while !self.at_end() {
241 if self.peek() != Some('{') {
242 self.lex_raw_text(tokens);
243 continue;
244 }
245
246 let sigil = self.peek_at(1);
247
248 match sigil {
249 Some('#') => {
250 if self.matches_at(2, "raw}") {
252 self.lex_raw_block(tokens)?;
253 } else {
254 let span = self.span();
255 self.advance(); self.advance(); tokens.push(mk(TokenKind::BlockOpen, span));
258 self.lex_tag(tokens)?;
259 }
260 }
261 Some(':') => {
262 let span = self.span();
263 self.advance(); self.advance(); tokens.push(mk(TokenKind::ContinueOpen, span));
266 self.lex_tag(tokens)?;
267 }
268 Some('/') => {
269 let span = self.span();
270 self.advance(); self.advance(); tokens.push(mk(TokenKind::BlockClose, span));
273 self.lex_tag(tokens)?;
274 }
275 Some('@') => {
276 let span = self.span();
277 self.advance(); self.advance(); tokens.push(mk(TokenKind::SpecialOpen, span));
280 self.lex_tag(tokens)?;
281 }
282 Some('!') => {
283 let span = self.span();
284 self.advance(); self.advance(); tokens.push(mk(TokenKind::CommentOpen, span));
287 self.lex_comment(tokens)?;
288 }
289 Some('=') => {
290 let span = self.span();
292 self.advance(); self.advance(); tokens.push(mk(TokenKind::ExprOpen, span));
295 self.lex_tag(tokens)?;
296 }
297 Some('~') => {
298 let span = self.span();
300 self.advance(); self.advance(); tokens.push(mk(TokenKind::ExprOpenRaw, span));
303 self.lex_tag(tokens)?;
304 }
305 Some('\\') => {
306 let escaped = self.peek_at(2);
309 if matches!(escaped, Some('=') | Some('~')) {
310 let span = self.span();
311 self.advance(); self.advance(); let sigil = self.advance().unwrap(); let mut text = format!("{{{sigil}");
315 while !self.at_end() && self.peek() != Some('{') {
317 text.push(self.advance().unwrap());
318 }
319 tokens.push(mk(TokenKind::RawText(text), span));
320 } else {
321 let span = self.span();
323 self.advance(); let mut text = String::from("{");
325 while !self.at_end() && self.peek() != Some('{') {
326 text.push(self.advance().unwrap());
327 }
328 tokens.push(mk(TokenKind::RawText(text), span));
329 }
330 }
331 _ => {
332 let span = self.span();
334 self.advance(); let mut text = String::from("{");
336 while !self.at_end() && self.peek() != Some('{') {
337 text.push(self.advance().unwrap());
338 }
339 tokens.push(mk(TokenKind::RawText(text), span));
340 }
341 }
342 }
343 Ok(())
344 }
345
346 fn lex_raw_text(&mut self, tokens: &mut Vec<Token>) {
347 let span = self.span();
348 let mut text = String::new();
349 while !self.at_end() && self.peek() != Some('{') {
350 text.push(self.advance().unwrap());
351 }
352 if !text.is_empty() {
353 tokens.push(mk(TokenKind::RawText(text), span));
354 }
355 }
356
357 fn lex_raw_block(&mut self, tokens: &mut Vec<Token>) -> Result<()> {
358 let open_span = self.span();
359 for _ in 0..6 {
361 self.advance();
362 }
363 tokens.push(mk(TokenKind::BlockOpen, open_span));
364 tokens.push(mk(TokenKind::KwRaw, self.span()));
365 tokens.push(mk(TokenKind::Close, self.span()));
366
367 let body_span = self.span();
369 let mut body = String::new();
370 loop {
371 if self.at_end() {
372 return Err(Error::LexError {
373 message: "Unclosed {#raw} block — expected {/raw}".to_string(),
374 span: self.span(),
375 });
376 }
377 if self.matches_at(0, "{/raw}") {
378 break;
379 }
380 body.push(self.advance().unwrap());
381 }
382 tokens.push(mk(TokenKind::RawBody(body), body_span));
383
384 let close_span = self.span();
386 for _ in 0..6 {
387 self.advance();
388 }
389 tokens.push(mk(TokenKind::BlockClose, close_span.clone()));
390 tokens.push(mk(TokenKind::KwRaw, close_span.clone()));
391 tokens.push(mk(TokenKind::Close, close_span));
392 Ok(())
393 }
394
395 fn lex_tag(&mut self, tokens: &mut Vec<Token>) -> Result<()> {
399 let mut brace_depth: usize = 0;
400 loop {
401 self.skip_ws();
402 if self.at_end() {
403 return Err(Error::LexError {
404 message: "Unexpected end of input inside tag".to_string(),
405 span: self.span(),
406 });
407 }
408
409 if brace_depth == 0 && self.peek() == Some('}') {
410 let span = self.span();
411 self.advance();
412 tokens.push(mk(TokenKind::Close, span));
413 return Ok(());
414 }
415
416 if self.peek() == Some('{') {
417 brace_depth += 1;
418 let span = self.span();
419 self.advance();
420 tokens.push(mk(TokenKind::LBraceD, span));
421 continue;
422 }
423
424 if self.peek() == Some('}') {
425 brace_depth -= 1;
427 let span = self.span();
428 self.advance();
429 tokens.push(mk(TokenKind::RBraceD, span));
430 continue;
431 }
432
433 let tok = self.next_tag_token()?;
434 tokens.push(tok);
435 }
436 }
437
438 fn lex_comment(&mut self, tokens: &mut Vec<Token>) -> Result<()> {
439 let body_span = self.span();
440 let mut body = String::new();
441 loop {
442 if self.at_end() {
443 return Err(Error::LexError {
444 message: "Unclosed comment — expected !}".to_string(),
445 span: self.span(),
446 });
447 }
448 if self.peek() == Some('!') && self.peek_at(1) == Some('}') {
449 let close_span = self.span();
450 self.advance(); self.advance(); tokens.push(mk(TokenKind::CommentBody(body), body_span));
453 tokens.push(mk(TokenKind::CommentClose, close_span));
454 return Ok(());
455 }
456 body.push(self.advance().unwrap());
457 }
458 }
459
460 fn skip_ws(&mut self) {
461 while matches!(
462 self.peek(),
463 Some(' ') | Some('\t') | Some('\n') | Some('\r')
464 ) {
465 self.advance();
466 }
467 }
468
469 fn next_tag_token(&mut self) -> Result<Token> {
470 let span = self.span();
471 let c = self.peek().unwrap();
472
473 match c {
474 '"' | '\'' => {
475 let s = self.lex_string(c)?;
476 Ok(mk(TokenKind::StringLit(s), span))
477 }
478 '0'..='9' => {
479 let kind = self.lex_number()?;
480 Ok(mk(kind, span))
481 }
482 'a'..='z' | 'A'..='Z' | '_' => {
483 let name = self.lex_ident();
484 let kind = keyword_or_ident(name);
485 Ok(mk(kind, span))
486 }
487 '|' => {
488 self.advance();
489 let kind = if self.advance_if('|') {
490 TokenKind::Or
491 } else {
492 TokenKind::Pipe
493 };
494 Ok(mk(kind, span))
495 }
496 '&' => {
497 self.advance();
498 if self.advance_if('&') {
499 Ok(mk(TokenKind::And, span))
500 } else {
501 Err(Error::LexError {
502 message: "Expected '&&' — lone '&' is not valid".to_string(),
503 span,
504 })
505 }
506 }
507 '?' => {
508 self.advance();
509 let kind = if self.advance_if('?') {
510 TokenKind::NullCoalesce
511 } else {
512 TokenKind::Question
513 };
514 Ok(mk(kind, span))
515 }
516 ':' => {
517 self.advance();
518 Ok(mk(TokenKind::Colon, span))
519 }
520 '=' => {
521 self.advance();
522 let kind = if self.advance_if('=') {
523 TokenKind::Eq
524 } else {
525 TokenKind::Assign
526 };
527 Ok(mk(kind, span))
528 }
529 '!' => {
530 self.advance();
531 let kind = if self.advance_if('=') {
532 TokenKind::Neq
533 } else {
534 TokenKind::Bang
535 };
536 Ok(mk(kind, span))
537 }
538 '<' => {
539 self.advance();
540 let kind = if self.advance_if('=') {
541 TokenKind::Lte
542 } else {
543 TokenKind::Lt
544 };
545 Ok(mk(kind, span))
546 }
547 '>' => {
548 self.advance();
549 let kind = if self.advance_if('=') {
550 TokenKind::Gte
551 } else {
552 TokenKind::Gt
553 };
554 Ok(mk(kind, span))
555 }
556 '+' => {
557 self.advance();
558 Ok(mk(TokenKind::Add, span))
559 }
560 '-' => {
561 self.advance();
562 Ok(mk(TokenKind::Sub, span))
563 }
564 '*' => {
565 self.advance();
566 Ok(mk(TokenKind::Mul, span))
567 }
568 '/' => {
569 self.advance();
570 Ok(mk(TokenKind::Div, span))
571 }
572 '%' => {
573 self.advance();
574 Ok(mk(TokenKind::Mod, span))
575 }
576 '.' => {
577 self.advance();
578 Ok(mk(TokenKind::Dot, span))
579 }
580 '(' => {
581 self.advance();
582 Ok(mk(TokenKind::LParen, span))
583 }
584 ')' => {
585 self.advance();
586 Ok(mk(TokenKind::RParen, span))
587 }
588 '[' => {
589 self.advance();
590 Ok(mk(TokenKind::LBracket, span))
591 }
592 ']' => {
593 self.advance();
594 Ok(mk(TokenKind::RBracket, span))
595 }
596 ',' => {
597 self.advance();
598 Ok(mk(TokenKind::Comma, span))
599 }
600 other => Err(Error::LexError {
601 message: format!("Unexpected character '{}' inside tag", other),
602 span,
603 }),
604 }
605 }
606
607 fn lex_ident(&mut self) -> String {
608 let mut s = String::new();
609 while matches!(
610 self.peek(),
611 Some('a'..='z') | Some('A'..='Z') | Some('0'..='9') | Some('_')
612 ) {
613 s.push(self.advance().unwrap());
614 }
615 s
616 }
617
618 fn lex_string(&mut self, quote: char) -> Result<String> {
619 self.advance(); let mut s = String::new();
621 loop {
622 match self.advance() {
623 None => {
624 return Err(Error::LexError {
625 message: "Unterminated string literal".to_string(),
626 span: self.span(),
627 });
628 }
629 Some(c) if c == quote => break,
630 Some('\\') => {
631 let esc_span = self.span();
632 match self.advance() {
633 Some('"') => s.push('"'),
634 Some('\'') => s.push('\''),
635 Some('\\') => s.push('\\'),
636 Some('n') => s.push('\n'),
637 Some('r') => s.push('\r'),
638 Some('t') => s.push('\t'),
639 Some('0') => s.push('\0'),
640 Some('u') => {
641 if !self.advance_if('{') {
642 return Err(Error::LexError {
643 message: "Expected '{' after \\u".to_string(),
644 span: self.span(),
645 });
646 }
647 let mut hex = String::new();
648 while matches!(
649 self.peek(),
650 Some('0'..='9') | Some('a'..='f') | Some('A'..='F')
651 ) {
652 hex.push(self.advance().unwrap());
653 }
654 if !self.advance_if('}') {
655 return Err(Error::LexError {
656 message: "Expected '}' after unicode escape".to_string(),
657 span: self.span(),
658 });
659 }
660 let code =
661 u32::from_str_radix(&hex, 16).map_err(|_| Error::LexError {
662 message: format!("Invalid unicode escape \\u{{{}}}", hex),
663 span: esc_span.clone(),
664 })?;
665 s.push(char::from_u32(code).ok_or(Error::LexError {
666 message: format!("Invalid unicode codepoint U+{:04X}", code),
667 span: esc_span,
668 })?);
669 }
670 Some(c) => {
671 return Err(Error::LexError {
672 message: format!("Unknown escape sequence '\\{}'", c),
673 span: esc_span,
674 });
675 }
676 None => {
677 return Err(Error::LexError {
678 message: "Unterminated escape sequence".to_string(),
679 span: esc_span,
680 });
681 }
682 }
683 }
684 Some(c) => s.push(c),
685 }
686 }
687 Ok(s)
688 }
689
690 fn lex_number(&mut self) -> Result<TokenKind> {
691 let mut s = String::new();
692 while matches!(self.peek(), Some('0'..='9')) {
693 s.push(self.advance().unwrap());
694 }
695 if self.peek() == Some('.') && matches!(self.peek_at(1), Some('0'..='9')) {
696 s.push(self.advance().unwrap()); while matches!(self.peek(), Some('0'..='9')) {
698 s.push(self.advance().unwrap());
699 }
700 if matches!(self.peek(), Some('e') | Some('E')) {
701 s.push(self.advance().unwrap());
702 if matches!(self.peek(), Some('+') | Some('-')) {
703 s.push(self.advance().unwrap());
704 }
705 while matches!(self.peek(), Some('0'..='9')) {
706 s.push(self.advance().unwrap());
707 }
708 }
709 let span = self.span();
710 let f: f64 = s.parse().map_err(|_| Error::LexError {
711 message: format!("Invalid float literal '{}'", s),
712 span,
713 })?;
714 Ok(TokenKind::FloatLit(f))
715 } else {
716 let span = self.span();
717 let i: i64 = s.parse().map_err(|_| Error::LexError {
718 message: format!("Invalid integer literal '{}'", s),
719 span,
720 })?;
721 Ok(TokenKind::IntLit(i))
722 }
723 }
724}
725
726fn mk(kind: TokenKind, span: Span) -> Token {
727 Token { kind, span }
728}
729
730fn keyword_or_ident(s: String) -> TokenKind {
731 match s.as_str() {
732 "if" => TokenKind::KwIf,
733 "else" => TokenKind::KwElse,
734 "each" => TokenKind::KwEach,
735 "as" => TokenKind::KwAs,
736 "snippet" => TokenKind::KwSnippet,
737 "raw" => TokenKind::KwRaw,
738 "render" => TokenKind::KwRender,
739 "const" => TokenKind::KwConst,
740 "include" => TokenKind::KwInclude,
741 "debug" => TokenKind::KwDebug,
742 "is" => TokenKind::KwIs,
743 "not" => TokenKind::KwNot,
744 "in" => TokenKind::KwIn,
745 "true" => TokenKind::True,
746 "false" => TokenKind::False,
747 "null" => TokenKind::Null,
748 _ => TokenKind::Ident(s),
749 }
750}
751
752pub fn tokenize(src: &str) -> Result<Vec<Token>> {
754 Lexer::new(src).tokenize()
755}