rusty_handlebars_parser/expression_tokenizer.rs
1//! Handlebars expression tokenization
2//!
3//! This module provides functionality for tokenizing Handlebars expressions into their component parts.
4//! It handles various token types including:
5//! - Literals: Plain text values
6//! - Private variables: Variables prefixed with @ (e.g. @index)
7//! - Sub-expressions: Parenthesized expressions
8//!
9//! # Token Types
10//!
11//! ## Literals
12//! Plain text values that are not special tokens:
13//! ```
14//! name
15//! user.age
16//! ```
17//!
18//! ## Private Variables
19//! Variables prefixed with @ that have special meaning:
20//! ```
21//! @index
22//! @key
23//! @value
24//! ```
25//!
26//! ## Sub-expressions
27//! Parenthesized expressions that are evaluated first:
28//! ```
29//! (helper arg1 arg2)
30//! (math.add 1 2)
31//! ```
32//!
33//! # Examples
34//!
35//! ```rust
36//! use rusty_handlebars_parser::expression_tokenizer::{Token, TokenType};
37//!
38//! let src = "user.name (helper arg) @index";
39//! let token = Token::first(src).unwrap().unwrap();
40//! assert_eq!(token.value, "user.name");
41//! assert_eq!(token.token_type, TokenType::Literal);
42//! ```
43
44use crate::error::{rcap, ParseError, Result};
45
46/// Types of tokens that can be parsed from an expression
47#[derive(Clone)]
48pub enum TokenType<'a> {
49 /// A parenthesized sub-expression
50 SubExpression(&'a str),
51 /// A private variable prefixed with @
52 PrivateVariable,
53 Variable,
54 /// A plain text literal
55 Literal
56}
57
58/// A token parsed from an expression
59#[derive(Clone)]
60pub struct Token<'a> {
61 /// The type of token
62 pub token_type: TokenType<'a>,
63 /// The token's value
64 pub value: &'a str,
65 /// The remaining text after this token
66 pub tail: &'a str
67}
68
69/// Finds the closing parenthesis for a sub-expression
70fn find_closing(src: &str) -> Result<usize> {
71 let mut count = 1;
72 let rest = &src[1..];
73 for (i, c) in rest.char_indices() {
74 match c {
75 '(' => count += 1,
76 ')' => count -= 1,
77 _ => ()
78 }
79 if count == 0 {
80 return Ok(i + 1);
81 }
82 }
83 Err(ParseError{ message: format!("unmatched brackets near {}", rcap(src))})
84}
85
86fn find_end_of_string(src: &str) -> Result<usize> {
87 let cliped = &src[1..];
88 let mut escaped = false;
89 for (i, c) in cliped.char_indices() {
90 match c {
91 '\\' => escaped = !escaped,
92 '"' => {
93 if !escaped {
94 return Ok(i + 2);
95 }
96 }
97 _ => ()
98 }
99 }
100 Err(ParseError{ message: format!("unterminated string near {}", rcap(src))})
101}
102
103/// Finds the end of a token by looking for whitespace or special characters
104fn find_end(src: &str) -> usize {
105 for (i, c) in src.char_indices() {
106 if " (\n\r\t".contains(c) {
107 return i
108 }
109 }
110 src.len()
111}
112
113fn invalid_variable_name(src: &str) -> bool {
114 if src.starts_with("../"){
115 return false; // ../ is valid for relative paths
116 }
117 return src.chars().next().map(|c| !(c.is_alphabetic() || c == '_')).unwrap_or(false)
118}
119
120/// Parses a single token from the input string
121fn parse<'a>(src: &'a str) -> Result<Option<Token<'a>>> {
122 Ok(match src.chars().next() {
123 Some('@') => {
124 let end = find_end(src);
125 Some(Token {
126 token_type: TokenType::PrivateVariable,
127 value: &src[1..end],
128 tail: &src[end..].trim_start()
129 })
130 },
131 Some('(') => {
132 let end = find_closing(&src)?;
133 Some(Token {
134 token_type: TokenType::SubExpression(&src[..end]),
135 value: &src[1..end],
136 tail: &src[end + 1..].trim_start()
137 })
138 },
139 None => None,
140 _ => {
141 let (end, token_type) = if src.starts_with('"') {
142 (find_end_of_string(src)?, TokenType::Literal)
143 } else {
144 (find_end(src), if invalid_variable_name(src) { TokenType::Literal } else { TokenType::Variable })
145 };
146 Some(Token {
147 token_type,
148 value: &src[..end],
149 tail: &src[end..].trim_start()
150 })
151 }
152 })
153}
154
155impl<'a> Token<'a> {
156 /// Parses the first token from a string
157 pub fn first(src: &'a str) -> Result<Option<Self>> {
158 parse(src.trim())
159 }
160
161 /// Parses the next token after this one
162 pub fn next(&self) -> Result<Option<Self>> {
163 parse(self.tail)
164 }
165}
166