ralph_workflow/prompts/
template_validator.rs

1//! Template validation and inspection module.
2//!
3//! Provides functionality for validating template syntax, extracting variables,
4//! and checking template integrity.
5
6use std::collections::HashSet;
7
8/// Template validation result.
9#[derive(Debug, Clone)]
10pub struct ValidationResult {
11    /// Whether validation passed
12    pub is_valid: bool,
13    /// Variables referenced in the template
14    pub variables: Vec<VariableInfo>,
15    /// Partials referenced in the template
16    pub partials: Vec<String>,
17    /// Validation errors found
18    pub errors: Vec<ValidationError>,
19    /// Validation warnings found
20    pub warnings: Vec<ValidationWarning>,
21}
22
23/// Information about a variable reference in a template.
24#[derive(Debug, Clone)]
25pub struct VariableInfo {
26    /// Name of the variable
27    pub name: String,
28    /// Line number where variable appears (0-indexed)
29    pub line: usize,
30    /// Whether the variable has a default value
31    pub has_default: bool,
32    /// Default value if present
33    pub default_value: Option<String>,
34}
35
36/// Template validation error.
37#[derive(Debug, Clone)]
38pub enum ValidationError {
39    /// Unclosed conditional block
40    UnclosedConditional { line: usize },
41    /// Unclosed loop block
42    UnclosedLoop { line: usize },
43    /// Invalid conditional syntax
44    InvalidConditional { line: usize, syntax: String },
45    /// Invalid loop syntax
46    InvalidLoop { line: usize, syntax: String },
47    /// Unclosed comment
48    UnclosedComment { line: usize },
49    /// Partial reference not found
50    PartialNotFound { name: String },
51}
52
53/// Template validation warning.
54#[derive(Debug, Clone)]
55pub enum ValidationWarning {
56    /// Variable appears to be unused (no default, might error if not provided)
57    VariableMayError { name: String },
58}
59
60/// Template metadata extracted from header comments.
61#[derive(Debug, Clone)]
62pub struct TemplateMetadata {
63    /// Template version
64    pub version: Option<String>,
65    /// Template purpose description
66    pub purpose: Option<String>,
67}
68
69/// Extract all variable references from template content.
70///
71/// Returns a list of all `{{VARIABLE}}` references found in the template,
72/// including their line numbers and default values if present.
73pub fn extract_variables(content: &str) -> Vec<VariableInfo> {
74    let mut variables = Vec::new();
75    let bytes = content.as_bytes();
76    let mut i = 0;
77    let mut line = 0;
78
79    while i < bytes.len().saturating_sub(1) {
80        // Track line numbers
81        if bytes[i] == b'\n' {
82            line += 1;
83        }
84
85        // Skip comment blocks
86        if i + 1 < bytes.len() && bytes[i] == b'{' && bytes[i + 1] == b'#' {
87            // Skip to end of comment
88            i += 2;
89            while i + 1 < bytes.len() && !(bytes[i] == b'#' && bytes[i + 1] == b'}') {
90                if bytes[i] == b'\n' {
91                    line += 1;
92                }
93                i += 1;
94            }
95            if i + 1 < bytes.len() {
96                i += 2; // Skip #}
97            }
98            continue;
99        }
100
101        // Check for {{...}} pattern
102        if bytes[i] == b'{' && i + 1 < bytes.len() && bytes[i + 1] == b'{' {
103            i += 2;
104
105            // Skip whitespace after {{
106            while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
107                i += 1;
108            }
109
110            let name_start = i;
111
112            // Find the closing }}
113            while i < bytes.len()
114                && !(bytes[i] == b'}' && i + 1 < bytes.len() && bytes[i + 1] == b'}')
115            {
116                i += 1;
117            }
118
119            if i < bytes.len() && bytes[i] == b'}' && i + 1 < bytes.len() && bytes[i + 1] == b'}' {
120                let var_spec = &content[name_start..i];
121                let trimmed_var = var_spec.trim();
122
123                // Skip partial references {{> partial}}
124                if !trimmed_var.starts_with('>') && !trimmed_var.is_empty() {
125                    // Check for default value syntax
126                    let (var_name, default_value) =
127                        var_spec.find('|').map_or((trimmed_var, None), |pipe_pos| {
128                            let name = var_spec[..pipe_pos].trim();
129                            let rest = &var_spec[pipe_pos + 1..];
130                            rest.find('=').map_or((name, None), |eq_pos| {
131                                let key = rest[..eq_pos].trim();
132                                if key == "default" {
133                                    let value = rest[eq_pos + 1..].trim();
134                                    let value = if (value.starts_with('"') && value.ends_with('"'))
135                                        || (value.starts_with('\'') && value.ends_with('\''))
136                                    {
137                                        &value[1..value.len() - 1]
138                                    } else {
139                                        value
140                                    };
141                                    (name, Some(value.to_string()))
142                                } else {
143                                    (name, None)
144                                }
145                            })
146                        });
147
148                    variables.push(VariableInfo {
149                        name: var_name.to_string(),
150                        line,
151                        has_default: default_value.is_some(),
152                        default_value,
153                    });
154                }
155
156                i += 2;
157                continue;
158            }
159        }
160
161        i += 1;
162    }
163
164    variables
165}
166
167/// Extract all partial references from template content.
168///
169/// Returns a list of all `{{> partial}}` references found in the template.
170pub fn extract_partials(content: &str) -> Vec<String> {
171    let mut partials = Vec::new();
172    let bytes = content.as_bytes();
173    let mut i = 0;
174
175    while i < bytes.len().saturating_sub(2) {
176        // Check for {{> pattern
177        if bytes[i] == b'{' && bytes[i + 1] == b'{' && i + 2 < bytes.len() {
178            i += 2;
179
180            // Skip whitespace after {{
181            while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
182                i += 1;
183            }
184
185            // Check for > character
186            if i < bytes.len() && bytes[i] == b'>' {
187                i += 1;
188
189                // Skip whitespace after >
190                while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
191                    i += 1;
192                }
193
194                // Extract partial name until }}
195                let name_start = i;
196                while i < bytes.len()
197                    && !(bytes[i] == b'}' && i + 1 < bytes.len() && bytes[i + 1] == b'}')
198                {
199                    i += 1;
200                }
201
202                if i < bytes.len()
203                    && bytes[i] == b'}'
204                    && i + 1 < bytes.len()
205                    && bytes[i + 1] == b'}'
206                {
207                    let name = content[name_start..i].trim();
208                    if !name.is_empty() {
209                        partials.push(name.to_string());
210                    }
211                    i += 2;
212                    continue;
213                }
214            }
215        }
216        i += 1;
217    }
218
219    partials
220}
221
222/// Extract template metadata from header comments.
223///
224/// Parses structured comments like:
225/// ```text
226/// {# Template: name #}
227/// {# Version: 1.0 #}
228/// {# VARIABLES: VAR1, VAR2 #}
229/// ```
230pub fn extract_metadata(content: &str) -> TemplateMetadata {
231    let mut version = None;
232    let mut purpose = None;
233
234    for line in content.lines().take(50) {
235        // Look for comment markers
236        let line = line.trim();
237        if !line.starts_with("{#") || !line.ends_with("#}") {
238            continue;
239        }
240
241        let inner = line[2..line.len() - 2].trim();
242
243        // Parse Version: x.x
244        if let Some(rest) = inner.strip_prefix("Version:") {
245            version = Some(rest.trim().to_string());
246        } else if let Some(rest) = inner.strip_prefix("PURPOSE:") {
247            // Parse PURPOSE: description
248            purpose = Some(rest.trim().to_string());
249        }
250    }
251
252    TemplateMetadata { version, purpose }
253}
254
255/// Validate a template's syntax and structure.
256///
257/// Checks for:
258/// - Unclosed variable references
259/// - Unclosed conditionals
260/// - Unclosed loops
261/// - Unclosed comments
262/// - Invalid syntax in conditionals and loops
263pub fn validate_syntax(content: &str) -> Vec<ValidationError> {
264    let bytes = content.as_bytes();
265    SyntaxValidator::new(content).validate(bytes)
266}
267
268/// Helper struct for template syntax validation.
269struct SyntaxValidator<'a> {
270    content: &'a str,
271    errors: Vec<ValidationError>,
272    line: usize,
273    i: usize,
274    conditional_stack: Vec<(usize, &'static str)>,
275    loop_stack: Vec<(usize, &'static str)>,
276}
277
278impl<'a> SyntaxValidator<'a> {
279    const fn new(content: &'a str) -> Self {
280        Self {
281            content,
282            errors: Vec::new(),
283            line: 0,
284            i: 0,
285            conditional_stack: Vec::new(),
286            loop_stack: Vec::new(),
287        }
288    }
289
290    fn validate(mut self, bytes: &[u8]) -> Vec<ValidationError> {
291        while self.i < bytes.len() {
292            self.track_newlines(bytes);
293            if self.try_skip_comment(bytes) {
294                continue;
295            }
296            if self.try_parse_conditional(bytes) {
297                continue;
298            }
299            if self.try_parse_loop(bytes) {
300                continue;
301            }
302            self.i += 1;
303        }
304        self.check_unclosed_blocks();
305        self.errors
306    }
307
308    fn track_newlines(&mut self, bytes: &[u8]) {
309        if bytes[self.i] == b'\n' {
310            self.line += 1;
311        }
312    }
313
314    fn try_skip_comment(&mut self, bytes: &[u8]) -> bool {
315        if self.i + 1 < bytes.len() && bytes[self.i] == b'{' && bytes[self.i + 1] == b'#' {
316            let comment_start = self.line;
317            self.i += 2;
318            while self.i + 1 < bytes.len() && !(bytes[self.i] == b'#' && bytes[self.i + 1] == b'}')
319            {
320                if bytes[self.i] == b'\n' {
321                    self.line += 1;
322                }
323                self.i += 1;
324            }
325            if self.i + 1 >= bytes.len() {
326                self.errors.push(ValidationError::UnclosedComment {
327                    line: comment_start,
328                });
329            }
330            if self.i + 1 < bytes.len() {
331                self.i += 2;
332            }
333            true
334        } else {
335            false
336        }
337    }
338
339    fn try_parse_conditional(&mut self, bytes: &[u8]) -> bool {
340        // Check for {% if ... %}
341        if self.i + 5 < bytes.len()
342            && bytes[self.i] == b'{'
343            && bytes[self.i + 1] == b'%'
344            && bytes[self.i + 2] == b' '
345            && bytes[self.i + 3] == b'i'
346            && bytes[self.i + 4] == b'f'
347            && bytes[self.i + 5] == b' '
348        {
349            let if_start = self.i;
350            self.i += 6;
351            while self.i + 1 < bytes.len() && !(bytes[self.i] == b'%' && bytes[self.i + 1] == b'}')
352            {
353                self.i += 1;
354            }
355            if self.i + 1 >= bytes.len() {
356                self.errors
357                    .push(ValidationError::UnclosedConditional { line: self.line });
358            } else {
359                let condition = self.content[if_start + 6..self.i].trim();
360                if condition.is_empty() || condition.contains('{') || condition.contains('}') {
361                    self.errors.push(ValidationError::InvalidConditional {
362                        line: self.line,
363                        syntax: condition.to_string(),
364                    });
365                }
366                self.conditional_stack.push((self.line, "if"));
367                self.i += 2;
368            }
369            return true;
370        }
371
372        // Check for {% endif %}
373        if self.i + 9 < bytes.len()
374            && bytes[self.i] == b'{'
375            && bytes[self.i + 1] == b'%'
376            && bytes[self.i + 2] == b' '
377            && bytes[self.i + 3] == b'e'
378            && bytes[self.i + 4] == b'n'
379            && bytes[self.i + 5] == b'd'
380            && bytes[self.i + 6] == b'i'
381            && bytes[self.i + 7] == b'f'
382            && bytes[self.i + 8] == b' '
383            && bytes[self.i + 9] == b'%'
384        {
385            self.conditional_stack.pop();
386            self.i += 11;
387            return true;
388        }
389
390        false
391    }
392
393    fn try_parse_loop(&mut self, bytes: &[u8]) -> bool {
394        // Check for {% for ... %}
395        if self.i + 6 < bytes.len()
396            && bytes[self.i] == b'{'
397            && bytes[self.i + 1] == b'%'
398            && bytes[self.i + 2] == b' '
399            && bytes[self.i + 3] == b'f'
400            && bytes[self.i + 4] == b'o'
401            && bytes[self.i + 5] == b'r'
402            && bytes[self.i + 6] == b' '
403        {
404            let for_start = self.i;
405            self.i += 7;
406            while self.i + 1 < bytes.len() && !(bytes[self.i] == b'%' && bytes[self.i + 1] == b'}')
407            {
408                self.i += 1;
409            }
410            if self.i + 1 >= bytes.len() {
411                self.errors
412                    .push(ValidationError::UnclosedLoop { line: self.line });
413            } else {
414                let condition = self.content[for_start + 7..self.i].trim();
415                if !condition.contains(" in ") || condition.split(" in ").count() != 2 {
416                    self.errors.push(ValidationError::InvalidLoop {
417                        line: self.line,
418                        syntax: condition.to_string(),
419                    });
420                }
421                self.loop_stack.push((self.line, "for"));
422                self.i += 2;
423            }
424            return true;
425        }
426
427        // Check for {% endfor %}
428        if self.i + 10 < bytes.len()
429            && bytes[self.i] == b'{'
430            && bytes[self.i + 1] == b'%'
431            && bytes[self.i + 2] == b' '
432            && bytes[self.i + 3] == b'e'
433            && bytes[self.i + 4] == b'n'
434            && bytes[self.i + 5] == b'd'
435            && bytes[self.i + 6] == b'f'
436            && bytes[self.i + 7] == b'o'
437            && bytes[self.i + 8] == b'r'
438            && bytes[self.i + 9] == b' '
439        {
440            self.loop_stack.pop();
441            self.i += 12;
442            return true;
443        }
444
445        false
446    }
447
448    fn check_unclosed_blocks(&mut self) {
449        if let Some((line, _)) = self.conditional_stack.first() {
450            self.errors
451                .push(ValidationError::UnclosedConditional { line: *line });
452        }
453        if let Some((line, _)) = self.loop_stack.first() {
454            self.errors
455                .push(ValidationError::UnclosedLoop { line: *line });
456        }
457    }
458}
459
460/// Validate a complete template.
461///
462/// Performs comprehensive validation including syntax checking,
463/// variable extraction, and partial reference validation.
464pub fn validate_template(content: &str, available_partials: &HashSet<String>) -> ValidationResult {
465    let mut is_valid = true;
466    let mut errors = Vec::new();
467    let mut warnings = Vec::new();
468
469    // Validate syntax
470    let syntax_errors = validate_syntax(content);
471    if !syntax_errors.is_empty() {
472        is_valid = false;
473        errors.extend(syntax_errors);
474    }
475
476    // Extract variables
477    let variables = extract_variables(content);
478
479    // Extract partials
480    let partials = extract_partials(content);
481
482    // Check for missing partials
483    for partial in &partials {
484        if !available_partials.contains(partial) {
485            is_valid = false;
486            errors.push(ValidationError::PartialNotFound {
487                name: partial.clone(),
488            });
489        }
490    }
491
492    // Check for variables without defaults that might error
493    for var in &variables {
494        if !var.has_default {
495            warnings.push(ValidationWarning::VariableMayError {
496                name: var.name.clone(),
497            });
498        }
499    }
500
501    ValidationResult {
502        is_valid,
503        variables,
504        partials,
505        errors,
506        warnings,
507    }
508}
509
510#[cfg(test)]
511mod tests {
512    use super::*;
513
514    #[test]
515    fn test_extract_simple_variable() {
516        let content = "Hello {{NAME}}";
517        let vars = extract_variables(content);
518        assert_eq!(vars.len(), 1);
519        assert_eq!(vars[0].name, "NAME");
520        assert!(!vars[0].has_default);
521    }
522
523    #[test]
524    fn test_extract_variable_with_whitespace() {
525        let content = "Value: {{ VALUE }}";
526        let vars = extract_variables(content);
527        assert_eq!(vars.len(), 1);
528        assert_eq!(vars[0].name, "VALUE");
529    }
530
531    #[test]
532    fn test_extract_variable_with_default() {
533        let content = "Hello {{NAME|default=\"Guest\"}}";
534        let vars = extract_variables(content);
535        assert_eq!(vars.len(), 1);
536        assert_eq!(vars[0].name, "NAME");
537        assert!(vars[0].has_default);
538        assert_eq!(vars[0].default_value, Some("Guest".to_string()));
539    }
540
541    #[test]
542    fn test_extract_variable_with_default_single_quotes() {
543        let content = "Hello {{NAME|default='Guest'}}";
544        let vars = extract_variables(content);
545        assert_eq!(vars.len(), 1);
546        assert_eq!(vars[0].default_value, Some("Guest".to_string()));
547    }
548
549    #[test]
550    fn test_extract_partials() {
551        let content = "{{> shared/_header}}\nContent";
552        let partials = extract_partials(content);
553        assert_eq!(partials.len(), 1);
554        assert_eq!(partials[0], "shared/_header");
555    }
556
557    #[test]
558    fn test_extract_multiple_partials() {
559        let content = "{{> header}}\n{{> footer}}";
560        let partials = extract_partials(content);
561        assert_eq!(partials.len(), 2);
562    }
563
564    #[test]
565    fn test_validate_syntax_valid() {
566        let content = "Hello {{NAME}}";
567        let errors = validate_syntax(content);
568        assert!(errors.is_empty());
569    }
570
571    #[test]
572    fn test_validate_syntax_unclosed_comment() {
573        let content = "Hello {# unclosed comment\nworld";
574        let errors = validate_syntax(content);
575        assert!(!errors.is_empty());
576        assert!(matches!(errors[0], ValidationError::UnclosedComment { .. }));
577    }
578
579    #[test]
580    fn test_validate_conditional_valid() {
581        let content = "{% if NAME %}Hello{% endif %}";
582        let errors = validate_syntax(content);
583        assert!(errors.is_empty());
584    }
585
586    #[test]
587    fn test_validate_loop_valid() {
588        let content = "{% for item in ITEMS %}{{item}}{% endfor %}";
589        let errors = validate_syntax(content);
590        assert!(errors.is_empty());
591    }
592
593    #[test]
594    fn test_validate_loop_invalid_syntax() {
595        let content = "{% for item ITEMS %}{{item}}{% endfor %}";
596        let errors = validate_syntax(content);
597        assert!(!errors.is_empty());
598        assert!(matches!(errors[0], ValidationError::InvalidLoop { .. }));
599    }
600
601    #[test]
602    fn test_extract_metadata() {
603        let content = r"{# Template: test.txt #}
604{# Version: 1.0 #}
605{# PURPOSE: Test template #}
606{# VARIABLES: {{NAME}}, {{AGE}} #}
607Content here";
608
609        let metadata = extract_metadata(content);
610        assert_eq!(metadata.version, Some("1.0".to_string()));
611        assert_eq!(metadata.purpose, Some("Test template".to_string()));
612    }
613
614    #[test]
615    fn test_validate_template_complete() {
616        let content = "Hello {{NAME|default=\"Guest\"}}";
617        let partials = HashSet::new();
618        let result = validate_template(content, &partials);
619
620        assert!(result.is_valid);
621        assert_eq!(result.variables.len(), 1);
622        assert!(result.errors.is_empty());
623    }
624
625    #[test]
626    fn test_validate_template_with_missing_partial() {
627        let content = "{{> missing_partial}}";
628        let partials = HashSet::new();
629        let result = validate_template(content, &partials);
630
631        assert!(!result.is_valid);
632        assert!(!result.errors.is_empty());
633    }
634
635    #[test]
636    fn test_skip_variables_in_comments() {
637        let content = "{# This is a comment with {{VARIABLE}} #}\nHello {{NAME}}";
638        let vars = extract_variables(content);
639        assert_eq!(vars.len(), 1);
640        assert_eq!(vars[0].name, "NAME");
641    }
642
643    #[test]
644    fn test_skip_partials_in_variable_extraction() {
645        let content = "{{> partial}}\n{{NAME}}";
646        let vars = extract_variables(content);
647        assert_eq!(vars.len(), 1);
648        assert_eq!(vars[0].name, "NAME");
649    }
650
651    #[test]
652    fn test_extract_variables_from_conditional() {
653        let content = "{% if NAME %}Hello {{NAME}}{% endif %}";
654        let vars = extract_variables(content);
655        assert_eq!(vars.len(), 1); // Only NAME in output is extracted
656    }
657}