Skip to main content

mst_parser/
parser.rs

1/* src/parser.rs */
2
3use crate::ast::{Limits, Node};
4use crate::error::Error;
5use alloc::string::String;
6use alloc::vec::Vec;
7
8/// State object for the parser to track current limits.
9struct ParserState {
10	node_count: usize,
11	depth: usize,
12	max_nodes: usize,
13	max_depth: usize,
14}
15
16impl ParserState {
17	fn new(limits: &Limits) -> Self {
18		Self {
19			node_count: 0,
20			depth: 0,
21			max_nodes: limits.max_nodes,
22			max_depth: limits.max_depth,
23		}
24	}
25
26	fn inc_node(&mut self, offset: usize) -> Result<(), Error> {
27		if self.node_count >= self.max_nodes {
28			return Err(Error::NodeLimitExceeded {
29				limit: self.max_nodes,
30				offset,
31			});
32		}
33		self.node_count += 1;
34		Ok(())
35	}
36
37	fn enter_depth(&mut self, offset: usize) -> Result<(), Error> {
38		if self.depth >= self.max_depth {
39			return Err(Error::DepthExceeded {
40				limit: self.max_depth,
41				offset,
42			});
43		}
44		self.depth += 1;
45		Ok(())
46	}
47
48	fn exit_depth(&mut self, offset: usize) -> Result<(), Error> {
49		self.depth = self
50			.depth
51			.checked_sub(1)
52			.ok_or(Error::UnbalancedTag { offset })?;
53		Ok(())
54	}
55}
56
57/// A configured template parser.
58#[derive(Debug, Clone, Default)]
59pub struct Parser {
60	limits: Limits,
61}
62
63impl Parser {
64	/// Creates a new parser with the given limits.
65	#[must_use]
66	pub fn new(limits: Limits) -> Self {
67		Self { limits }
68	}
69
70	/// Parses a template string into AST nodes.
71	pub fn parse(&self, input: &str) -> Result<Vec<Node>, Error> {
72		parse_inner(input, &self.limits)
73	}
74}
75
76/// Convenience function that parses with default limits.
77pub fn parse(input: &str) -> Result<Vec<Node>, Error> {
78	Parser::default().parse(input)
79}
80
81fn parse_inner(input: &str, limits: &Limits) -> Result<Vec<Node>, Error> {
82	let mut state = ParserState::new(limits);
83	let mut chars = input.char_indices().peekable();
84	parse_recursive(&mut chars, input, &mut state, None)
85}
86
87/// Internal recursive parser.
88///
89/// `var_open_offset`: Some(idx) if we are currently parsing content inside `{{ ... }}` started at `idx`.
90fn parse_recursive(
91	chars: &mut core::iter::Peekable<core::str::CharIndices<'_>>,
92	original_input: &str,
93	state: &mut ParserState,
94	var_open_offset: Option<usize>,
95) -> Result<Vec<Node>, Error> {
96	let mut nodes = Vec::new();
97	let mut current_text_start = None;
98
99	while let Some((idx, ch)) = chars.next() {
100		// Check for '{{' start sequence
101		if ch == '{' {
102			if let Some(&(_, '{')) = chars.peek() {
103				// Found "{{", start of a variable
104
105				// 1. Flush any preceding text
106				if let Some(start) = current_text_start {
107					if start < idx {
108						let text = &original_input[start..idx];
109						state.inc_node(start)?;
110						nodes.push(Node::Text(String::from(text)));
111						#[cfg(feature = "tracing")]
112						tracing::trace!(text = text, "parsed text node");
113					}
114					current_text_start = None;
115				}
116
117				// 2. Consume the second '{'
118				chars.next();
119
120				// 3. Parse variable content recursively
121				state.inc_node(idx)?; // Count the Variable node itself
122				state.enter_depth(idx)?;
123
124				#[cfg(feature = "tracing")]
125				tracing::debug!(depth = state.depth, "entering variable");
126
127				let parts = parse_recursive(chars, original_input, state, Some(idx))?;
128
129				#[cfg(feature = "tracing")]
130				tracing::debug!(depth = state.depth, "exiting variable");
131
132				state.exit_depth(idx)?;
133				nodes.push(Node::Variable { parts });
134
135				continue;
136			}
137		}
138
139		// Check for '}}' end sequence
140		if let Some(open_offset) = var_open_offset {
141			if ch == '}' {
142				if let Some(&(_, '}')) = chars.peek() {
143					// Found "}}", end of current variable
144
145					// 1. Flush any preceding text
146					if let Some(start) = current_text_start {
147						if start < idx {
148							let text = &original_input[start..idx];
149							state.inc_node(start)?;
150							nodes.push(Node::Text(String::from(text)));
151							#[cfg(feature = "tracing")]
152							tracing::trace!(text = text, "parsed text node");
153						}
154						// current_text_start = None; // Not needed before return
155					}
156
157					// 2. Consume the second '}'
158					chars.next();
159
160					// Check for empty variable
161					if nodes.is_empty() {
162						return Err(Error::EmptyVariable {
163							offset: open_offset,
164						});
165					}
166
167					// Return what we collected for this variable
168					return Ok(nodes);
169				}
170			}
171		}
172
173		// Normal character, track text start
174		if current_text_start.is_none() {
175			current_text_start = Some(idx);
176		}
177	}
178
179	// End of input reached
180	if let Some(open_offset) = var_open_offset {
181		// If we were expecting '}}' but hit EOF, that's an error
182		return Err(Error::UnclosedVariable {
183			offset: open_offset,
184		});
185	}
186
187	// Flush remaining text at EOF for top-level
188	if let Some(start) = current_text_start {
189		let end_idx = original_input.len();
190		if start < end_idx {
191			let text = &original_input[start..end_idx];
192			state.inc_node(start)?;
193			nodes.push(Node::Text(String::from(text)));
194			#[cfg(feature = "tracing")]
195			tracing::trace!(text = text, "parsed text node");
196		}
197	}
198
199	Ok(nodes)
200}
201
202#[cfg(test)]
203mod tests {
204	use super::*;
205	use alloc::vec;
206
207	#[test]
208	fn test_plain_text() {
209		let input = "Hello World";
210		let nodes = parse(input).unwrap();
211		assert_eq!(nodes, vec![Node::Text("Hello World".into())]);
212	}
213
214	#[test]
215	fn test_simple_variable() {
216		let input = "Hello {{name}}!";
217		let nodes = parse(input).unwrap();
218		assert_eq!(
219			nodes,
220			vec![
221				Node::Text("Hello ".into()),
222				Node::Variable {
223					parts: vec![Node::Text("name".into())]
224				},
225				Node::Text("!".into())
226			]
227		);
228	}
229
230	#[test]
231	fn test_nested_variable() {
232		// "key.{{sub}}" -> Variable(Text("key."), Variable(Text("sub")))
233		let input = "{{key.{{sub}}}}";
234		let nodes = parse(input).unwrap();
235
236		match &nodes[0] {
237			Node::Variable { parts } => {
238				assert_eq!(parts.len(), 2);
239				assert_eq!(parts[0], Node::Text("key.".into()));
240				match &parts[1] {
241					Node::Variable { parts: sub_parts } => {
242						assert_eq!(sub_parts[0], Node::Text("sub".into()));
243					}
244					Node::Text(_) => panic!("Expected inner variable"),
245				}
246			}
247			Node::Text(_) => panic!("Expected outer variable"),
248		}
249	}
250
251	#[test]
252	fn test_unclosed_variable() {
253		let input = "Hello {{name";
254		let err = parse(input).unwrap_err();
255		match err {
256			Error::UnclosedVariable { offset } => assert_eq!(offset, 6),
257			_ => panic!("Unexpected error: {err:?}"),
258		}
259	}
260
261	#[test]
262	fn test_depth_limit() {
263		let limits = Limits {
264			max_depth: 1,
265			max_nodes: 100,
266		};
267		let parser = Parser::new(limits);
268		// Depth 0 (root) -> Depth 1 ({{a}}) -> Depth 2 ({{b}}) -> Fail
269		let input = "{{a{{b}}}}";
270		let err = parser.parse(input).unwrap_err();
271		match err {
272			Error::DepthExceeded { limit, offset } => {
273				assert_eq!(limit, 1);
274				assert_eq!(offset, 3); // second {{ starts at 3
275			}
276			_ => panic!("Unexpected error: {err:?}"),
277		}
278	}
279
280	#[test]
281	fn test_node_limit() {
282		let limits = Limits {
283			max_depth: 10,
284			max_nodes: 2,
285		};
286		let parser = Parser::new(limits);
287		// "abc" (1 node) + {{...}} (1 node) + "d" (1 node) = 3 nodes -> Fail
288		let input = "abc{{d}}";
289		let err = parser.parse(input).unwrap_err();
290		match err {
291			Error::NodeLimitExceeded { limit, offset } => {
292				assert_eq!(limit, 2); // Error happens at "d" (node 3)
293				assert_eq!(offset, 5); // "d" starts at 5
294			}
295			_ => panic!("Unexpected error: {err:?}"),
296		}
297	}
298
299	#[test]
300	fn test_consecutive_braces() {
301		// "{{{" -> First "{{" opens variable. Remaining "{" is text inside variable.
302		// Expecting "}}" to close.
303		let input = "{{{}}";
304		let nodes = parse(input).unwrap();
305		match &nodes[0] {
306			Node::Variable { parts } => {
307				assert_eq!(parts[0], Node::Text("{".into()));
308			}
309			Node::Text(_) => panic!("Expected variable"),
310		}
311	}
312
313	#[test]
314	fn test_empty_variable() {
315		let input = "{{}}";
316		let err = parse(input).unwrap_err();
317		match err {
318			Error::EmptyVariable { offset } => assert_eq!(offset, 0),
319			_ => panic!("Unexpected error: {err:?}"),
320		}
321	}
322
323	#[test]
324	fn test_top_level_closing_braces() {
325		let input = "hello}}world";
326		let nodes = parse(input).unwrap();
327		assert_eq!(nodes, vec![Node::Text("hello}}world".into())]);
328	}
329}