Skip to main content

mst_parser/
parser.rs

1/* src/parser.rs */
2
3use alloc::string::String;
4use alloc::vec::Vec;
5use crate::ast::{Node, Limits};
6use crate::error::Error;
7
8/// State object for the parser to track current limits.
9struct ParserState {
10    node_count: usize,
11    depth: usize,
12    max_nodes: usize,
13    max_depth: usize,
14}
15
16impl ParserState {
17    fn new(limits: &Limits) -> Self {
18        Self {
19            node_count: 0,
20            depth: 0,
21            max_nodes: limits.max_nodes,
22            max_depth: limits.max_depth,
23        }
24    }
25
26    fn inc_node(&mut self, offset: usize) -> Result<(), Error> {
27        if self.node_count >= self.max_nodes {
28            return Err(Error::NodeLimitExceeded { limit: self.max_nodes, offset });
29        }
30        self.node_count += 1;
31        Ok(())
32    }
33
34    fn enter_depth(&mut self, offset: usize) -> Result<(), Error> {
35        if self.depth >= self.max_depth {
36            return Err(Error::DepthExceeded { limit: self.max_depth, offset });
37        }
38        self.depth += 1;
39        Ok(())
40    }
41
42    fn exit_depth(&mut self, offset: usize) -> Result<(), Error> {
43        self.depth = self.depth.checked_sub(1)
44            .ok_or(Error::UnbalancedTag { offset })?;
45        Ok(())
46    }
47}
48
49/// A configured template parser.
50#[derive(Debug, Clone, Default)]
51pub struct Parser {
52    limits: Limits,
53}
54
55impl Parser {
56    /// Creates a new parser with the given limits.
57    #[must_use]
58    pub fn new(limits: Limits) -> Self {
59        Self { limits }
60    }
61
62    /// Parses a template string into AST nodes.
63    pub fn parse(&self, input: &str) -> Result<Vec<Node>, Error> {
64        parse_inner(input, &self.limits)
65    }
66}
67
68/// Convenience function that parses with default limits.
69pub fn parse(input: &str) -> Result<Vec<Node>, Error> {
70    Parser::default().parse(input)
71}
72
73fn parse_inner(input: &str, limits: &Limits) -> Result<Vec<Node>, Error> {
74    let mut state = ParserState::new(limits);
75    let mut chars = input.char_indices().peekable();
76    parse_recursive(&mut chars, input, &mut state, None)
77}
78
79/// Internal recursive parser.
80///
81/// `var_open_offset`: Some(idx) if we are currently parsing content inside `{{ ... }}` started at `idx`.
82fn parse_recursive(
83    chars: &mut core::iter::Peekable<core::str::CharIndices<'_>>,
84    original_input: &str,
85    state: &mut ParserState,
86    var_open_offset: Option<usize>,
87) -> Result<Vec<Node>, Error> {
88    let mut nodes = Vec::new();
89    let mut current_text_start = None;
90
91    while let Some((idx, ch)) = chars.next() {
92        // Check for '{{' start sequence
93        if ch == '{' {
94            if let Some(&(_, '{')) = chars.peek() {
95                // Found "{{", start of a variable
96                
97                // 1. Flush any preceding text
98                if let Some(start) = current_text_start {
99                    if start < idx {
100                        let text = &original_input[start..idx];
101                        state.inc_node(start)?;
102                        nodes.push(Node::Text(String::from(text)));
103                        #[cfg(feature = "tracing")]
104                        tracing::trace!(text = text, "parsed text node");
105                    }
106                    current_text_start = None;
107                }
108
109                // 2. Consume the second '{'
110                chars.next();
111
112                // 3. Parse variable content recursively
113                state.inc_node(idx)?; // Count the Variable node itself
114                state.enter_depth(idx)?;
115                
116                #[cfg(feature = "tracing")]
117                tracing::debug!(depth = state.depth, "entering variable");
118
119                let parts = parse_recursive(chars, original_input, state, Some(idx))?;
120                
121                #[cfg(feature = "tracing")]
122                tracing::debug!(depth = state.depth, "exiting variable");
123                
124                state.exit_depth(idx)?;
125                nodes.push(Node::Variable { parts });
126
127                continue;
128            }
129        }
130
131        // Check for '}}' end sequence
132        if let Some(open_offset) = var_open_offset {
133            if ch == '}' {
134                if let Some(&(_, '}')) = chars.peek() {
135                    // Found "}}", end of current variable
136                    
137                    // 1. Flush any preceding text
138                    if let Some(start) = current_text_start {
139                        if start < idx {
140                            let text = &original_input[start..idx];
141                            state.inc_node(start)?;
142                            nodes.push(Node::Text(String::from(text)));
143                            #[cfg(feature = "tracing")]
144                            tracing::trace!(text = text, "parsed text node");
145                        }
146                        // current_text_start = None; // Not needed before return
147                    }
148
149                    // 2. Consume the second '}'
150                    chars.next();
151                    
152                    // Check for empty variable
153                    if nodes.is_empty() {
154                        return Err(Error::EmptyVariable { offset: open_offset });
155                    }
156
157                    // Return what we collected for this variable
158                    return Ok(nodes);
159                }
160            }
161        }
162
163        // Normal character, track text start
164        if current_text_start.is_none() {
165            current_text_start = Some(idx);
166        }
167    }
168
169    // End of input reached
170    if let Some(open_offset) = var_open_offset {
171        // If we were expecting '}}' but hit EOF, that's an error
172        return Err(Error::UnclosedVariable { offset: open_offset });
173    }
174
175    // Flush remaining text at EOF for top-level
176    if let Some(start) = current_text_start {
177        let end_idx = original_input.len();
178        if start < end_idx {
179            let text = &original_input[start..end_idx];
180            state.inc_node(start)?;
181            nodes.push(Node::Text(String::from(text)));
182            #[cfg(feature = "tracing")]
183            tracing::trace!(text = text, "parsed text node");
184        }
185    }
186
187    Ok(nodes)
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193    use alloc::vec;
194
195    #[test]
196    fn test_plain_text() {
197        let input = "Hello World";
198        let nodes = parse(input).unwrap();
199        assert_eq!(nodes, vec![Node::Text("Hello World".into())]);
200    }
201
202    #[test]
203    fn test_simple_variable() {
204        let input = "Hello {{name}}!";
205        let nodes = parse(input).unwrap();
206        assert_eq!(
207            nodes,
208            vec![
209                Node::Text("Hello ".into()),
210                Node::Variable {
211                    parts: vec![Node::Text("name".into())]
212                },
213                Node::Text("!".into())
214            ]
215        );
216    }
217
218    #[test]
219    fn test_nested_variable() {
220        // "key.{{sub}}" -> Variable(Text("key."), Variable(Text("sub")))
221        let input = "{{key.{{sub}}}}";
222        let nodes = parse(input).unwrap();
223        
224        match &nodes[0] {
225            Node::Variable { parts } => {
226                assert_eq!(parts.len(), 2);
227                assert_eq!(parts[0], Node::Text("key.".into()));
228                match &parts[1] {
229                    Node::Variable { parts: sub_parts } => {
230                        assert_eq!(sub_parts[0], Node::Text("sub".into()));
231                    }
232                    _ => panic!("Expected inner variable"),
233                }
234            }
235            _ => panic!("Expected outer variable"),
236        }
237    }
238
239    #[test]
240    fn test_unclosed_variable() {
241        let input = "Hello {{name";
242        let err = parse(input).unwrap_err();
243        match err {
244            Error::UnclosedVariable { offset } => assert_eq!(offset, 6),
245            _ => panic!("Unexpected error: {:?}", err),
246        }
247    }
248
249    #[test]
250    fn test_depth_limit() {
251        let limits = Limits { max_depth: 1, max_nodes: 100 };
252        let parser = Parser::new(limits);
253        // Depth 0 (root) -> Depth 1 ({{a}}) -> Depth 2 ({{b}}) -> Fail
254        let input = "{{a{{b}}}}"; 
255        let err = parser.parse(input).unwrap_err();
256        match err {
257            Error::DepthExceeded { limit, offset } => {
258                assert_eq!(limit, 1);
259                assert_eq!(offset, 3); // second {{ starts at 3
260            },
261            _ => panic!("Unexpected error: {:?}", err),
262        }
263    }
264
265    #[test]
266    fn test_node_limit() {
267        let limits = Limits { max_depth: 10, max_nodes: 2 };
268        let parser = Parser::new(limits);
269        // "abc" (1 node) + {{...}} (1 node) + "d" (1 node) = 3 nodes -> Fail
270        let input = "abc{{d}}"; 
271        let err = parser.parse(input).unwrap_err();
272         match err {
273            Error::NodeLimitExceeded { limit, offset } => {
274                assert_eq!(limit, 2); // Error happens at "d" (node 3)
275                assert_eq!(offset, 5); // "d" starts at 5
276            },
277            _ => panic!("Unexpected error: {:?}", err),
278        }
279    }
280    
281    #[test]
282    fn test_consecutive_braces() {
283        // "{{{" -> First "{{" opens variable. Remaining "{" is text inside variable.
284        // Expecting "}}" to close.
285        let input = "{{{}}"; 
286        let nodes = parse(input).unwrap();
287        match &nodes[0] {
288            Node::Variable { parts } => {
289                assert_eq!(parts[0], Node::Text("{".into()));
290            }
291            _ => panic!("Expected variable"),
292        }
293    }
294
295    #[test]
296    fn test_empty_variable() {
297        let input = "{{}}";
298        let err = parse(input).unwrap_err();
299        match err {
300            Error::EmptyVariable { offset } => assert_eq!(offset, 0),
301            _ => panic!("Unexpected error: {:?}", err),
302        }
303    }
304
305    #[test]
306    fn test_top_level_closing_braces() {
307        let input = "hello}}world";
308        let nodes = parse(input).unwrap();
309        assert_eq!(nodes, vec![Node::Text("hello}}world".into())]);
310    }
311}