sherpack_convert/
macro_processor.rs

1//! Macro variable scoping post-processor
2//!
3//! This module resolves bare variable references inside macro definitions to their
4//! fully qualified paths (e.g., `chroot` → `values.controller.image.chroot`).
5//!
6//! The problem arises from Helm's `with` blocks that set a context:
7//! ```go
8//! {{- with .Values.controller.image }}
9//!   {{- if .chroot }}...
10//! {{- end }}
11//! ```
12//!
13//! When converted to Jinja2 macros, the context is lost and variables become bare.
14//! This post-processor fixes that by searching values.yaml for matching keys.
15
16use regex::Regex;
17use serde_yaml::Value;
18use std::collections::HashMap;
19
20/// A macro definition with its name and body
21#[derive(Debug, Clone)]
22pub struct MacroDefinition {
23    pub name: String,
24    pub body: String,
25    pub start_offset: usize,
26    pub end_offset: usize,
27}
28
29/// Result of resolving a variable
30#[derive(Debug, Clone)]
31pub enum ResolvedVariable {
32    /// Single unambiguous path found
33    Unique(String),
34    /// Multiple paths found - needs manual resolution
35    Ambiguous(Vec<String>),
36    /// No matching path found
37    NotFound,
38}
39
40/// Post-processor for macro variable scoping
41#[derive(Debug)]
42pub struct MacroPostProcessor {
43    /// Map of variable name to all paths where it appears in values.yaml
44    variable_paths: HashMap<String, Vec<String>>,
45}
46
47impl MacroPostProcessor {
48    /// Create a new processor from values.yaml content
49    pub fn from_yaml(yaml_content: &str) -> Result<Self, serde_yaml::Error> {
50        let value: Value = serde_yaml::from_str(yaml_content)?;
51        let mut variable_paths = HashMap::new();
52        Self::collect_paths(&value, "values", &mut variable_paths);
53        Ok(Self { variable_paths })
54    }
55
56    /// Create from a pre-parsed YAML value
57    pub fn from_value(value: &Value) -> Self {
58        let mut variable_paths = HashMap::new();
59        Self::collect_paths(value, "values", &mut variable_paths);
60        Self { variable_paths }
61    }
62
63    /// Recursively collect all variable paths from the YAML structure
64    fn collect_paths(value: &Value, current_path: &str, paths: &mut HashMap<String, Vec<String>>) {
65        match value {
66            Value::Mapping(map) => {
67                for (key, val) in map {
68                    if let Value::String(key_str) = key {
69                        let new_path = format!("{}.{}", current_path, key_str);
70                        // Record this key's full path
71                        paths
72                            .entry(key_str.clone())
73                            .or_default()
74                            .push(new_path.clone());
75                        // Recurse into nested values
76                        Self::collect_paths(val, &new_path, paths);
77                    }
78                }
79            }
80            Value::Sequence(seq) => {
81                for (idx, val) in seq.iter().enumerate() {
82                    let new_path = format!("{}[{}]", current_path, idx);
83                    Self::collect_paths(val, &new_path, paths);
84                }
85            }
86            _ => {}
87        }
88    }
89
90    /// Resolve a bare variable name to its full path
91    pub fn resolve(&self, variable: &str) -> ResolvedVariable {
92        match self.variable_paths.get(variable) {
93            Some(paths) if paths.len() == 1 => ResolvedVariable::Unique(paths[0].clone()),
94            Some(paths) if paths.len() > 1 => ResolvedVariable::Ambiguous(paths.clone()),
95            _ => ResolvedVariable::NotFound,
96        }
97    }
98
99    /// Resolve a variable with a hint about the expected parent path segment
100    ///
101    /// For example, if the variable is `chroot` and the hint is `image`,
102    /// prefer `values.controller.image.chroot` over other paths.
103    pub fn resolve_with_hint(&self, variable: &str, hint: &str) -> ResolvedVariable {
104        match self.variable_paths.get(variable) {
105            Some(paths) if paths.len() == 1 => ResolvedVariable::Unique(paths[0].clone()),
106            Some(paths) if paths.len() > 1 => {
107                // The full path should end with {hint}.{variable}
108                let expected_suffix = format!(".{}.{}", hint, variable);
109
110                let matching: Vec<_> = paths
111                    .iter()
112                    .filter(|p| p.ends_with(&expected_suffix))
113                    .cloned()
114                    .collect();
115
116                match matching.len() {
117                    1 => ResolvedVariable::Unique(matching[0].clone()),
118                    0 => {
119                        // Fall back to containing the hint anywhere in path
120                        let fallback: Vec<_> = paths
121                            .iter()
122                            .filter(|p| p.contains(&format!(".{}.", hint)))
123                            .cloned()
124                            .collect();
125                        match fallback.len() {
126                            1 => ResolvedVariable::Unique(fallback[0].clone()),
127                            0 => ResolvedVariable::Ambiguous(paths.clone()),
128                            _ => ResolvedVariable::Ambiguous(fallback),
129                        }
130                    }
131                    _ => ResolvedVariable::Ambiguous(matching),
132                }
133            }
134            _ => ResolvedVariable::NotFound,
135        }
136    }
137
138    /// Extract all macro definitions from a template
139    pub fn extract_macros(content: &str) -> Vec<MacroDefinition> {
140        let mut macros = Vec::new();
141        // Match {%- macro name(...) %}...{%- endmacro %}
142        let macro_re =
143            Regex::new(r"(?s)\{%-?\s*macro\s+(\w+)\s*\([^)]*\)\s*%\}(.*?)\{%-?\s*endmacro\s*%\}")
144                .unwrap();
145
146        for caps in macro_re.captures_iter(content) {
147            let full_match = caps.get(0).unwrap();
148            macros.push(MacroDefinition {
149                name: caps[1].to_string(),
150                body: caps[2].to_string(),
151                start_offset: full_match.start(),
152                end_offset: full_match.end(),
153            });
154        }
155
156        macros
157    }
158
159    /// Find bare variable references in a macro body
160    ///
161    /// Returns variable names that are:
162    /// - Not prefixed with `values.`, `release.`, `pack.`, `capabilities.`, `_with_ctx.`
163    /// - Not Jinja2 keywords or builtin functions
164    /// - Inside expressions ({{ }}) or control structures ({% %})
165    /// - Not loop or set variable declarations
166    pub fn find_bare_variables(macro_body: &str) -> Vec<String> {
167        let mut bare_vars = Vec::new();
168
169        // Known prefixes for qualified variables
170        let qualified_prefixes = [
171            "values.",
172            "release.",
173            "pack.",
174            "capabilities.",
175            "_with_ctx.",
176            "loop.",
177            "item.",
178            "key.",
179            "value.",
180            "self.",
181        ];
182
183        // Known Jinja2/template keywords and builtins to ignore
184        let keywords = [
185            "true",
186            "false",
187            "none",
188            "null",
189            "and",
190            "or",
191            "not",
192            "in",
193            "is",
194            "if",
195            "else",
196            "elif",
197            "endif",
198            "for",
199            "endfor",
200            "set",
201            "endset",
202            "macro",
203            "endmacro",
204            "import",
205            "from",
206            "include",
207            "block",
208            "endblock",
209            "extends",
210            "call",
211            "filter",
212            "raw",
213            "endraw",
214            "with",
215            "endwith",
216            // Common filter/function names
217            "nindent",
218            "indent",
219            "quote",
220            "toyaml",
221            "tojson",
222            "trunc",
223            "default",
224            "trimsuffix",
225            "trimprefix",
226            "replace",
227            "lower",
228            "upper",
229            "title",
230            "dictsort",
231            "merge",
232            "tpl",
233            "toString",
234            "semver_match",
235            "b64encode",
236            "len",
237        ];
238
239        // First, find all loop and set variable declarations to exclude them
240        // Pattern: {% for VAR, VAR in ... %} or {% set VAR = ... %}
241        let mut declared_vars: Vec<String> = Vec::new();
242
243        let for_vars_pattern = Regex::new(r"\{%-?\s*for\s+(\w+)(?:\s*,\s*(\w+))?\s+in\s+").unwrap();
244
245        let set_pattern = Regex::new(r"\{%-?\s*set\s+(\w+)\s*=").unwrap();
246
247        for caps in for_vars_pattern.captures_iter(macro_body) {
248            declared_vars.push(caps[1].to_string());
249            if let Some(m) = caps.get(2) {
250                declared_vars.push(m.as_str().to_string());
251            }
252        }
253
254        for caps in set_pattern.captures_iter(macro_body) {
255            declared_vars.push(caps[1].to_string());
256        }
257
258        // Match variable references in expressions
259        // Look for: {{ var }}, {{ var | filter }}, {{ var.something }}, {% if var %}
260        let var_pattern = Regex::new(
261            r"\{\{[^}]*?([a-zA-Z_][a-zA-Z0-9_]*)\s*(?:[|}\.]|\s*\)|\s*~|\s*==|\s*!=|\s*<|\s*>|\s*and|\s*or|\s*%\})"
262        ).unwrap();
263
264        let control_pattern = Regex::new(
265            r"\{%[^%]*?(?:if|elif)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*(?:[|%}\.]|\s*\)|\s*~|\s*==|\s*!=)",
266        )
267        .unwrap();
268
269        // Also catch standalone variable references in various contexts
270        let standalone_pattern = Regex::new(
271            r"(?:^|[^a-zA-Z0-9_\.])([a-zA-Z_][a-zA-Z0-9_]*)(?:\s*\||\s*\}|\s*~|\s*==|\s*!=|\s*<|\s*>|\s+and|\s+or|\s*\))"
272        ).unwrap();
273
274        for pattern in [&var_pattern, &control_pattern, &standalone_pattern] {
275            for caps in pattern.captures_iter(macro_body) {
276                let var_name = caps[1].to_string();
277
278                // Skip if it's a keyword
279                if keywords.contains(&var_name.to_lowercase().as_str()) {
280                    continue;
281                }
282
283                // Skip if it's a declared loop/set variable
284                if declared_vars.contains(&var_name) {
285                    continue;
286                }
287
288                // Check if this is a qualified variable (has a prefix before it)
289                let var_pos = caps.get(1).unwrap().start();
290                let before = &macro_body[..var_pos];
291                let is_qualified = qualified_prefixes.iter().any(|prefix| {
292                    before.ends_with(prefix)
293                        || before.ends_with(&format!("({})", prefix.trim_end_matches('.')))
294                });
295
296                if !is_qualified && !bare_vars.contains(&var_name) {
297                    bare_vars.push(var_name);
298                }
299            }
300        }
301
302        bare_vars
303    }
304
305    /// Process a template, resolving bare variables in macros
306    ///
307    /// Returns the processed content and a list of variables that couldn't be resolved
308    pub fn process(&self, content: &str) -> (String, Vec<UnresolvedVariable>) {
309        let mut result = content.to_string();
310        let mut unresolved = Vec::new();
311
312        let macros = Self::extract_macros(content);
313
314        for macro_def in macros {
315            let bare_vars = Self::find_bare_variables(&macro_def.body);
316
317            // Derive hints from macro name, ordered by specificity
318            let hints = Self::derive_hints(&macro_def.name);
319
320            for var in bare_vars {
321                // Try each hint in order until we find a unique resolution
322                let mut resolution = self.resolve(&var);
323
324                for hint in &hints {
325                    match &resolution {
326                        ResolvedVariable::Unique(_) => break, // Already resolved
327                        _ => {
328                            resolution = self.resolve_with_hint(&var, hint);
329                        }
330                    }
331                }
332
333                match resolution {
334                    ResolvedVariable::Unique(full_path) => {
335                        // Replace this variable in the macro body
336                        result =
337                            Self::replace_bare_variable(&result, &macro_def.name, &var, &full_path);
338                    }
339                    ResolvedVariable::Ambiguous(paths) => {
340                        unresolved.push(UnresolvedVariable {
341                            variable: var,
342                            macro_name: macro_def.name.clone(),
343                            candidates: paths,
344                            reason: "Multiple matching paths found".to_string(),
345                        });
346                    }
347                    ResolvedVariable::NotFound => {
348                        // This might be a local variable or macro parameter - don't report
349                    }
350                }
351            }
352        }
353
354        (result, unresolved)
355    }
356
357    /// Derive hints from the macro name, ordered by specificity
358    ///
359    /// Returns a list of hints to try, from most specific to least specific
360    fn derive_hints(macro_name: &str) -> Vec<String> {
361        let name_lower = macro_name.to_lowercase();
362        let mut hints = Vec::new();
363
364        // Common patterns for ingress-nginx and similar charts
365        // More specific hints come first
366
367        // Controller-related patterns
368        if name_lower.contains("controller") {
369            if name_lower.contains("image") {
370                hints.push("controller.image".to_string());
371            }
372            hints.push("controller".to_string());
373        }
374
375        // Standalone image macros typically refer to controller.image
376        if name_lower.contains("image") || name_lower.contains("digest") {
377            hints.push("controller.image".to_string());
378            hints.push("image".to_string());
379        }
380
381        // Backend-related patterns
382        if name_lower.contains("backend") {
383            if name_lower.contains("image") {
384                hints.push("defaultBackend.image".to_string());
385            }
386            hints.push("defaultBackend".to_string());
387        }
388
389        // Webhook-related patterns
390        if name_lower.contains("webhook") {
391            hints.push("admissionWebhooks".to_string());
392        }
393
394        hints
395    }
396
397    /// Replace a bare variable with its qualified path within a specific macro
398    fn replace_bare_variable(
399        content: &str,
400        macro_name: &str,
401        var: &str,
402        full_path: &str,
403    ) -> String {
404        // Build a regex that matches the macro and replaces the variable inside it
405        let macro_pattern = format!(
406            r"(?s)(\{{% *-? *macro {} *\([^)]*\) *%\}})(.*?)(\{{% *-? *endmacro *%\}})",
407            regex::escape(macro_name)
408        );
409
410        let macro_re = Regex::new(&macro_pattern).unwrap();
411
412        macro_re
413            .replace(content, |caps: &regex::Captures| {
414                let prefix = &caps[1];
415                let body = &caps[2];
416                let suffix = &caps[3];
417
418                // Replace bare variable with qualified path in the body
419                let new_body = Self::replace_bare_in_body(body, var, full_path);
420
421                format!("{}{}{}", prefix, new_body, suffix)
422            })
423            .to_string()
424    }
425
426    /// Replace a bare variable in a macro body, being careful about context
427    fn replace_bare_in_body(body: &str, var: &str, full_path: &str) -> String {
428        // We need to be careful to only replace variables in expression contexts,
429        // not inside string literals like "-chroot" or 'value'
430        //
431        // Strategy: Process the body segment by segment, tracking whether we're
432        // inside a string literal or not
433
434        let mut result = String::with_capacity(body.len() + 100);
435        let chars: Vec<char> = body.chars().collect();
436        let mut i = 0;
437        let var_chars: Vec<char> = var.chars().collect();
438
439        while i < chars.len() {
440            let ch = chars[i];
441
442            // Track string literals - skip content inside quotes
443            if ch == '"' || ch == '\'' {
444                let quote = ch;
445                result.push(ch);
446                i += 1;
447                // Copy everything until closing quote
448                while i < chars.len() && chars[i] != quote {
449                    result.push(chars[i]);
450                    i += 1;
451                }
452                if i < chars.len() {
453                    result.push(chars[i]); // closing quote
454                    i += 1;
455                }
456                continue;
457            }
458
459            // Check if we're at a potential variable match
460            if Self::is_var_match(&chars, i, &var_chars) {
461                let before_idx = if i > 0 { i - 1 } else { 0 };
462                let after_idx = i + var_chars.len();
463
464                // Check character before - should not be alphanumeric, underscore, or dot
465                let valid_before = i == 0 || {
466                    let bc = chars[before_idx];
467                    !bc.is_alphanumeric() && bc != '_' && bc != '.'
468                };
469
470                // Check character after - should not be alphanumeric or underscore
471                let valid_after = after_idx >= chars.len() || {
472                    let ac = chars[after_idx];
473                    !ac.is_alphanumeric() && ac != '_'
474                };
475
476                if valid_before && valid_after {
477                    // Replace with full path
478                    result.push_str(full_path);
479                    i += var_chars.len();
480                    continue;
481                }
482            }
483
484            result.push(ch);
485            i += 1;
486        }
487
488        result
489    }
490
491    /// Check if chars starting at position i match the variable
492    fn is_var_match(chars: &[char], i: usize, var_chars: &[char]) -> bool {
493        if i + var_chars.len() > chars.len() {
494            return false;
495        }
496        for (j, vc) in var_chars.iter().enumerate() {
497            if chars[i + j] != *vc {
498                return false;
499            }
500        }
501        true
502    }
503}
504
505/// Information about a variable that couldn't be resolved
506#[derive(Debug, Clone)]
507pub struct UnresolvedVariable {
508    pub variable: String,
509    pub macro_name: String,
510    pub candidates: Vec<String>,
511    pub reason: String,
512}
513
514#[cfg(test)]
515mod tests {
516    use super::*;
517
518    const TEST_VALUES: &str = r#"
519controller:
520  name: controller
521  image:
522    chroot: false
523    image: ingress-nginx/controller
524    tag: "v1.14.1"
525    digest: sha256:abc123
526    digestChroot: sha256:def456
527defaultBackend:
528  image:
529    image: defaultbackend
530    tag: "1.5"
531"#;
532
533    #[test]
534    fn test_from_yaml() {
535        let processor = MacroPostProcessor::from_yaml(TEST_VALUES).unwrap();
536        assert!(processor.variable_paths.contains_key("chroot"));
537        assert!(processor.variable_paths.contains_key("digest"));
538        assert!(processor.variable_paths.contains_key("image"));
539    }
540
541    #[test]
542    fn test_resolve_unique() {
543        let processor = MacroPostProcessor::from_yaml(TEST_VALUES).unwrap();
544
545        // chroot only exists in controller.image
546        match processor.resolve("chroot") {
547            ResolvedVariable::Unique(path) => {
548                assert_eq!(path, "values.controller.image.chroot");
549            }
550            _ => panic!("Expected unique resolution for chroot"),
551        }
552
553        // digestChroot only exists in controller.image
554        match processor.resolve("digestChroot") {
555            ResolvedVariable::Unique(path) => {
556                assert_eq!(path, "values.controller.image.digestChroot");
557            }
558            _ => panic!("Expected unique resolution for digestChroot"),
559        }
560    }
561
562    #[test]
563    fn test_resolve_ambiguous() {
564        let processor = MacroPostProcessor::from_yaml(TEST_VALUES).unwrap();
565
566        // "image" exists in both controller.image and defaultBackend.image
567        match processor.resolve("image") {
568            ResolvedVariable::Ambiguous(paths) => {
569                assert!(paths.len() >= 2);
570                assert!(paths.iter().any(|p| p.contains("controller.image.image")));
571                assert!(
572                    paths
573                        .iter()
574                        .any(|p| p.contains("defaultBackend.image.image"))
575                );
576            }
577            _ => panic!("Expected ambiguous resolution for image"),
578        }
579    }
580
581    #[test]
582    fn test_resolve_with_hint() {
583        let processor = MacroPostProcessor::from_yaml(TEST_VALUES).unwrap();
584
585        // With hint "controller.image", should resolve to controller.image.image
586        match processor.resolve_with_hint("image", "controller.image") {
587            ResolvedVariable::Unique(path) => {
588                assert_eq!(path, "values.controller.image.image");
589            }
590            _ => panic!("Expected unique resolution with hint"),
591        }
592    }
593
594    #[test]
595    fn test_extract_macros() {
596        let content = r#"
597{%- macro image() %}
598{{- (image ~ "-chroot") -}}
599{%- endmacro %}
600
601{%- macro imageDigest() %}
602{%- if chroot %}
603{{- ("@" ~ digestChroot) -}}
604{%- endif %}
605{%- endmacro %}
606"#;
607
608        let macros = MacroPostProcessor::extract_macros(content);
609        assert_eq!(macros.len(), 2);
610        assert_eq!(macros[0].name, "image");
611        assert_eq!(macros[1].name, "imageDigest");
612    }
613
614    #[test]
615    fn test_find_bare_variables() {
616        let macro_body = r#"
617{%- if chroot %}
618{{- (image ~ "-chroot") -}}
619{%- else %}
620{{- (image) -}}
621{%- endif %}
622"#;
623
624        let bare_vars = MacroPostProcessor::find_bare_variables(macro_body);
625        assert!(bare_vars.contains(&"chroot".to_string()));
626        assert!(bare_vars.contains(&"image".to_string()));
627    }
628
629    #[test]
630    fn test_process_full() {
631        let processor = MacroPostProcessor::from_yaml(TEST_VALUES).unwrap();
632
633        let content = r#"
634{%- macro imageDigest() %}
635{%- if chroot %}
636{{- ("@" ~ digestChroot) -}}
637{%- endif %}
638{%- endmacro %}
639"#;
640
641        let (processed, unresolved) = processor.process(content);
642
643        // chroot and digestChroot should be resolved
644        assert!(processed.contains("values.controller.image.chroot"));
645        assert!(processed.contains("values.controller.image.digestChroot"));
646        // No unresolved variables
647        assert!(
648            unresolved.is_empty(),
649            "Unexpected unresolved: {:?}",
650            unresolved
651        );
652    }
653
654    #[test]
655    fn test_no_false_positives() {
656        let processor = MacroPostProcessor::from_yaml(TEST_VALUES).unwrap();
657
658        let content = r#"
659{%- macro test() %}
660{%- set local_var = "test" %}
661{{- local_var | upper -}}
662{{- values.controller.name -}}
663{%- endmacro %}
664"#;
665
666        let (processed, _) = processor.process(content);
667
668        // Should not modify already-qualified variables or local variables
669        assert!(processed.contains("values.controller.name"));
670        assert!(processed.contains("local_var"));
671        // Should not add extra "values." prefix
672        assert!(!processed.contains("values.values."));
673    }
674
675    #[test]
676    fn test_replace_bare_variable() {
677        let content = r#"{%- macro image() %}
678{%- if chroot %}
679test
680{%- endif %}
681{%- endmacro %}"#;
682
683        let result = MacroPostProcessor::replace_bare_variable(
684            content,
685            "image",
686            "chroot",
687            "values.controller.image.chroot",
688        );
689
690        assert!(result.contains("values.controller.image.chroot"));
691        assert!(!result.contains(" chroot "));
692    }
693}