oxidize_pdf/templates/
parser.rs1use regex::Regex;
2use std::collections::HashSet;
3
4use super::error::{TemplateError, TemplateResult};
5
6#[derive(Debug, Clone, PartialEq)]
8pub struct Placeholder {
9 pub full_text: String,
11 pub variable_name: String,
13 pub start: usize,
15 pub end: usize,
17}
18
19impl Placeholder {
20 pub fn new(full_text: String, variable_name: String, start: usize, end: usize) -> Self {
22 Self {
23 full_text,
24 variable_name,
25 start,
26 end,
27 }
28 }
29}
30
31pub struct TemplateParser {
33 placeholder_regex: Regex,
35}
36
37impl TemplateParser {
38 pub fn new() -> Self {
43 let placeholder_regex = Regex::new(r"\{\{\s*([a-zA-Z_][a-zA-Z0-9_.]*)\s*\}\}")
52 .or_else(|_| Regex::new(r"\{\{([^}]+)\}\}"))
53 .or_else(|_| Regex::new(r"[^\x00-\x7F]+")) .unwrap_or_else(|_| {
55 unreachable!("All regex patterns failed to compile - regex engine is broken")
58 });
59
60 Self { placeholder_regex }
61 }
62
63 pub fn parse(&self, template: &str) -> TemplateResult<Vec<Placeholder>> {
65 let mut placeholders = Vec::new();
66
67 for captures in self.placeholder_regex.captures_iter(template) {
68 let full_match = &captures[0];
70 let variable_name_str = &captures[1];
71
72 let full_text = full_match.to_string();
73 let variable_name = variable_name_str.to_string();
74 let start = captures.get(0).map(|m| m.start()).unwrap_or(0);
75 let end = captures.get(0).map(|m| m.end()).unwrap_or(0);
76
77 self.validate_variable_name(&variable_name)?;
79
80 placeholders.push(Placeholder::new(full_text, variable_name, start, end));
81 }
82
83 self.check_for_invalid_patterns(template)?;
85
86 self.check_for_invalid_variable_names_in_braces(template)?;
88
89 Ok(placeholders)
90 }
91
92 pub fn get_variable_names(&self, template: &str) -> TemplateResult<Vec<String>> {
94 let placeholders = self.parse(template)?;
95 let mut names: HashSet<String> = HashSet::new();
96
97 for placeholder in placeholders {
98 names.insert(placeholder.variable_name);
99 }
100
101 let mut result: Vec<String> = names.into_iter().collect();
102 result.sort();
103 Ok(result)
104 }
105
106 fn validate_variable_name(&self, name: &str) -> TemplateResult<()> {
108 if name.is_empty() {
109 return Err(TemplateError::InvalidVariableName(name.to_string()));
110 }
111
112 if let Some(first_char) = name.chars().next() {
114 if !first_char.is_alphabetic() && first_char != '_' {
115 return Err(TemplateError::InvalidVariableName(name.to_string()));
116 }
117 } else {
118 return Err(TemplateError::InvalidVariableName(name.to_string()));
120 }
121
122 for ch in name.chars() {
124 if !ch.is_alphanumeric() && ch != '_' && ch != '.' {
125 return Err(TemplateError::InvalidVariableName(name.to_string()));
126 }
127 }
128
129 Ok(())
130 }
131
132 fn check_for_invalid_patterns(&self, template: &str) -> TemplateResult<()> {
134 let empty_placeholder_regex = Regex::new(r"\{\{\s*\}\}")?;
136 if let Some(empty_match) = empty_placeholder_regex.find(template) {
137 return Err(TemplateError::InvalidPlaceholder(format!(
138 "Empty placeholder found at position {}: '{}'",
139 empty_match.start(),
140 empty_match.as_str()
141 )));
142 }
143
144 let malformed_regex = Regex::new(r"\{\{\{+|\}\}\}+")?;
146 if let Some(malformed_match) = malformed_regex.find(template) {
147 return Err(TemplateError::InvalidPlaceholder(format!(
148 "Malformed placeholder at position {}: '{}' - use exactly two braces",
149 malformed_match.start(),
150 malformed_match.as_str()
151 )));
152 }
153
154 let all_double_braces_regex = Regex::new(r"\{\{\s*[^}]*\s*\}\}")?;
157 let cleaned = all_double_braces_regex.replace_all(template, "");
158
159 let single_brace_regex = Regex::new(r"[{}]")?;
161 if let Some(invalid_match) = single_brace_regex.find(&cleaned) {
162 let position = self.find_original_position(&cleaned, invalid_match.start(), template);
164 return Err(TemplateError::InvalidPlaceholder(format!(
165 "Found single brace near position {}: '{}' - did you mean to use double braces {{{{}}}}?",
166 position,
167 invalid_match.as_str()
168 )));
169 }
170
171 Ok(())
172 }
173
174 pub fn has_placeholders(&self, template: &str) -> bool {
176 self.placeholder_regex.is_match(template)
177 }
178
179 pub fn count_placeholders(&self, template: &str) -> usize {
181 self.placeholder_regex.find_iter(template).count()
182 }
183
184 fn find_original_position(&self, _cleaned: &str, cleaned_pos: usize, original: &str) -> usize {
186 cleaned_pos.min(original.len().saturating_sub(1))
189 }
190
191 fn check_for_invalid_variable_names_in_braces(&self, template: &str) -> TemplateResult<()> {
193 let all_double_braces_regex = Regex::new(r"\{\{\s*([^}]*)\s*\}\}")?;
195
196 for captures in all_double_braces_regex.captures_iter(template) {
197 let variable_name = captures[1].trim();
199
200 if self.validate_variable_name(variable_name).is_err() {
202 return Err(TemplateError::InvalidVariableName(
203 variable_name.to_string(),
204 ));
205 }
206 }
207
208 Ok(())
209 }
210}
211
212impl Default for TemplateParser {
213 fn default() -> Self {
214 Self::new()
215 }
216}
217
218#[cfg(test)]
219mod tests {
220 use super::*;
221
222 #[test]
223 fn test_basic_placeholder_parsing() {
224 let parser = TemplateParser::new();
225 let template = "Hello {{name}}, your total is {{total}}.";
226
227 let placeholders = parser.parse(template).unwrap();
228 assert_eq!(placeholders.len(), 2);
229
230 assert_eq!(placeholders[0].variable_name, "name");
231 assert_eq!(placeholders[0].full_text, "{{name}}");
232 assert_eq!(placeholders[0].start, 6);
233
234 assert_eq!(placeholders[1].variable_name, "total");
235 assert_eq!(placeholders[1].full_text, "{{total}}");
236 }
237
238 #[test]
239 fn test_dot_notation_variables() {
240 let parser = TemplateParser::new();
241 let template = "User: {{user.name}} ({{user.age}} years old)";
242
243 let placeholders = parser.parse(template).unwrap();
244 assert_eq!(placeholders.len(), 2);
245
246 assert_eq!(placeholders[0].variable_name, "user.name");
247 assert_eq!(placeholders[1].variable_name, "user.age");
248 }
249
250 #[test]
251 fn test_whitespace_handling() {
252 let parser = TemplateParser::new();
253 let template = "{{ name }} and {{ total }}";
254
255 let placeholders = parser.parse(template).unwrap();
256 assert_eq!(placeholders.len(), 2);
257
258 assert_eq!(placeholders[0].variable_name, "name");
259 assert_eq!(placeholders[1].variable_name, "total");
260 }
261
262 #[test]
263 fn test_get_variable_names() {
264 let parser = TemplateParser::new();
265 let template = "{{name}} {{total}} {{name}} {{user.age}}";
266
267 let names = parser.get_variable_names(template).unwrap();
268 assert_eq!(names, vec!["name", "total", "user.age"]);
269 }
270
271 #[test]
272 fn test_invalid_variable_names() {
273 let parser = TemplateParser::new();
274 let template = "{{123invalid}}";
276 let result = parser.parse(template);
277 assert!(matches!(result, Err(TemplateError::InvalidVariableName(_))));
278 }
279
280 #[test]
281 fn test_invalid_placeholder_patterns() {
282 let parser = TemplateParser::new();
283 let template = "Hello {name}";
285 let result = parser.parse(template);
286 assert!(matches!(result, Err(TemplateError::InvalidPlaceholder(_))));
287
288 let template = "Hello {{}}";
290 let result = parser.parse(template);
291 assert!(matches!(result, Err(TemplateError::InvalidPlaceholder(_))));
292
293 let template = "Hello {{{name}}}";
295 let result = parser.parse(template);
296 assert!(matches!(result, Err(TemplateError::InvalidPlaceholder(_))));
297 }
298
299 #[test]
300 fn test_has_placeholders() {
301 let parser = TemplateParser::new();
302 assert!(parser.has_placeholders("Hello {{name}}"));
303 assert!(!parser.has_placeholders("Hello world"));
304 }
305
306 #[test]
307 fn test_count_placeholders() {
308 let parser = TemplateParser::new();
309 assert_eq!(parser.count_placeholders("{{a}} {{b}} {{c}}"), 3);
310 assert_eq!(parser.count_placeholders("No placeholders here"), 0);
311 assert_eq!(parser.count_placeholders("{{duplicate}} {{duplicate}}"), 2);
312 }
313
314 #[test]
315 fn test_placeholder_positions() {
316 let parser = TemplateParser::new();
317 let template = "Start {{var1}} middle {{var2}} end";
318
319 let placeholders = parser.parse(template).unwrap();
320 assert_eq!(placeholders[0].start, 6);
321 assert_eq!(placeholders[0].end, 14); assert_eq!(placeholders[1].start, 22);
323 assert_eq!(placeholders[1].end, 30); assert_eq!(
327 &template[placeholders[0].start..placeholders[0].end],
328 "{{var1}}"
329 );
330 assert_eq!(
331 &template[placeholders[1].start..placeholders[1].end],
332 "{{var2}}"
333 );
334 }
335}