ghostscope_compiler/script/
format_validator.rs

1//! Format string validation for print statements
2//!
3//! This module validates format strings and their arguments during compilation,
4//! ensuring correct placeholder count and syntax.
5
6use crate::script::ast::Expr;
7use crate::script::parser::ParseError;
8
9pub struct FormatValidator;
10
11impl FormatValidator {
12    /// Validate that format string placeholders match the number of arguments
13    pub fn validate_format_arguments(format: &str, args: &[Expr]) -> Result<(), ParseError> {
14        let (placeholders, star_extras) = Self::count_required_args(format)?;
15        let required_args = placeholders + star_extras;
16
17        if required_args != args.len() {
18            let args_len = args.len();
19            return Err(ParseError::TypeError(format!(
20                "Format string '{format}' expects {required_args} argument(s) but received {args_len} argument(s)"
21            )));
22        }
23
24        // TODO (phase 2): validate expression types against format specifiers
25        // e.g., {:x} requires integer or pointer; {:s} requires char*/bytes
26
27        Ok(())
28    }
29
30    /// Count the number of placeholders in a format string
31    /// Supports basic {} placeholders and escape sequences {{, }}
32    /// Extended: supports {:x}, {:X}, {:p}, {:s}, and optional length suffixes .N or .*
33    /// Returns (placeholders, star_extras) where star_extras is the number of additional
34    /// dynamic-length arguments required by `.*` occurrences.
35    fn count_required_args(format: &str) -> Result<(usize, usize), ParseError> {
36        let mut placeholders = 0usize;
37        let mut star_extras = 0usize;
38        let mut chars = format.chars().peekable();
39
40        while let Some(ch) = chars.next() {
41            match ch {
42                '{' => {
43                    if chars.peek() == Some(&'{') {
44                        chars.next(); // Skip escaped '{{'
45                    } else {
46                        // Found a placeholder, look for closing '}'
47                        let mut found_closing = false;
48                        let mut placeholder_content = String::new();
49
50                        for inner_ch in chars.by_ref() {
51                            if inner_ch == '}' {
52                                found_closing = true;
53                                break;
54                            }
55                            placeholder_content.push(inner_ch);
56                        }
57
58                        if !found_closing {
59                            return Err(ParseError::InvalidExpression);
60                        }
61
62                        // Accept: empty "{}" or extended forms like ":x", ":X", ":p", ":s", optionally with
63                        // a length suffix ".N" (digits) or ".*" (dynamic length consumes one extra argument)
64                        if placeholder_content.is_empty() {
65                            placeholders += 1;
66                        } else {
67                            // Must start with ':'
68                            if !placeholder_content.starts_with(':') {
69                                return Err(ParseError::TypeError(format!(
70                        "Invalid format specifier '{{{placeholder_content}}}': expected ':' prefix"
71                    )));
72                            }
73                            // Extract conv and optional suffix
74                            let tail = &placeholder_content[1..];
75                            // conv is first char
76                            let mut iter = tail.chars();
77                            let conv = iter.next().ok_or_else(|| {
78                                ParseError::TypeError("Empty format after ':'".to_string())
79                            })?;
80                            match conv {
81                                'x' | 'X' | 'p' | 's' => {}
82                                _ => {
83                                    return Err(ParseError::TypeError(format!(
84                                        "Unsupported format conversion '{{:{conv}}}'"
85                                    )));
86                                }
87                            }
88                            // Remaining should be empty or ".N" or ".*" or ".name$" (capture variable)
89                            let rest: String = iter.collect();
90                            if rest.is_empty() {
91                                // ok
92                            } else if let Some(rem) = rest.strip_prefix('.') {
93                                if rem == "*" {
94                                    star_extras += 1; // dynamic length consumes next arg
95                                } else if let Some(name) = rem.strip_suffix('$') {
96                                    // capture variable name: [A-Za-z_][A-Za-z0-9_]*$
97                                    let mut chars = name.chars();
98                                    let valid = if let Some(first) = chars.next() {
99                                        (first.is_ascii_alphabetic() || first == '_')
100                                            && chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
101                                    } else {
102                                        false
103                                    };
104                                    if !valid {
105                                        return Err(ParseError::TypeError(format!(
106                                            "Invalid capture variable in specifier '{{:{conv}.{rem}}}'"
107                                        )));
108                                    }
109                                } else if rem.chars().all(|c| c.is_ascii_digit())
110                                    || (rem.starts_with("0x")
111                                        && rem.len() > 2
112                                        && rem[2..].chars().all(|c| c.is_ascii_hexdigit()))
113                                    || (rem.starts_with("0o")
114                                        && rem.len() > 2
115                                        && rem[2..].chars().all(|c| matches!(c, '0'..='7')))
116                                    || (rem.starts_with("0b")
117                                        && rem.len() > 2
118                                        && rem[2..].chars().all(|c| matches!(c, '0' | '1')))
119                                {
120                                    // static length with base support: decimal / 0x.. / 0o.. / 0b..
121                                } else {
122                                    return Err(ParseError::TypeError(format!(
123                                        "Invalid length in specifier '{{:{conv}{rest}}}'"
124                                    )));
125                                }
126                            } else {
127                                return Err(ParseError::TypeError(format!(
128                                    "Invalid specifier syntax '{{:{conv}{rest}}}'"
129                                )));
130                            }
131                            placeholders += 1;
132                        }
133                    }
134                }
135                '}' => {
136                    if chars.peek() == Some(&'}') {
137                        chars.next(); // Skip escaped '}}'
138                    } else {
139                        return Err(ParseError::InvalidExpression); // Unmatched '}'
140                    }
141                }
142                _ => {}
143            }
144        }
145
146        Ok((placeholders, star_extras))
147    }
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153    use crate::script::ast::Expr;
154
155    #[test]
156    fn test_count_placeholders() {
157        // Basic cases
158        assert_eq!(
159            FormatValidator::count_required_args("hello world").unwrap(),
160            (0, 0)
161        );
162        assert_eq!(
163            FormatValidator::count_required_args("hello {}").unwrap(),
164            (1, 0)
165        );
166        assert_eq!(
167            FormatValidator::count_required_args("{} {}").unwrap(),
168            (2, 0)
169        );
170        assert_eq!(
171            FormatValidator::count_required_args("pid: {}, name: {}").unwrap(),
172            (2, 0)
173        );
174
175        // Escape sequences
176        assert_eq!(
177            FormatValidator::count_required_args("use {{}} for braces").unwrap(),
178            (0, 0)
179        );
180        assert_eq!(
181            FormatValidator::count_required_args("value: {}, braces: {{}}").unwrap(),
182            (1, 0)
183        );
184
185        // Error cases
186        assert!(FormatValidator::count_required_args("unclosed {").is_err());
187        assert!(FormatValidator::count_required_args("unmatched }").is_err());
188
189        // Extended specifiers
190        assert_eq!(
191            FormatValidator::count_required_args("{:x}").unwrap(),
192            (1, 0)
193        );
194        assert_eq!(
195            FormatValidator::count_required_args("{:X}").unwrap(),
196            (1, 0)
197        );
198        assert_eq!(
199            FormatValidator::count_required_args("{:p}").unwrap(),
200            (1, 0)
201        );
202        assert_eq!(
203            FormatValidator::count_required_args("{:s}").unwrap(),
204            (1, 0)
205        );
206        assert_eq!(
207            FormatValidator::count_required_args("{:x.16}").unwrap(),
208            (1, 0)
209        );
210        assert_eq!(
211            FormatValidator::count_required_args("{:s.*}").unwrap(),
212            (1, 1)
213        );
214        assert_eq!(
215            FormatValidator::count_required_args("{:x.len$}").unwrap(),
216            (1, 0)
217        );
218        // Static length with hex/oct/bin
219        assert_eq!(
220            FormatValidator::count_required_args("{:x.0x10}").unwrap(),
221            (1, 0)
222        );
223        assert_eq!(
224            FormatValidator::count_required_args("{:s.0o20}").unwrap(),
225            (1, 0)
226        );
227        assert_eq!(
228            FormatValidator::count_required_args("{:X.0b1000}").unwrap(),
229            (1, 0)
230        );
231        assert!(FormatValidator::count_required_args("{:x.1a$}").is_err());
232    }
233
234    #[test]
235    fn test_validate_format_arguments() {
236        let args_empty: Vec<Expr> = vec![];
237        let args_one = vec![Expr::Variable("pid".to_string())];
238        let args_two = vec![
239            Expr::Variable("pid".to_string()),
240            Expr::String("test".to_string()),
241        ];
242
243        // Matching cases
244        assert!(FormatValidator::validate_format_arguments("no placeholders", &args_empty).is_ok());
245        assert!(FormatValidator::validate_format_arguments("pid: {}", &args_one).is_ok());
246        assert!(FormatValidator::validate_format_arguments("pid: {}, name: {}", &args_two).is_ok());
247
248        // Mismatched cases
249        assert!(FormatValidator::validate_format_arguments("need one: {}", &args_empty).is_err());
250        assert!(FormatValidator::validate_format_arguments("no placeholders", &args_one).is_err());
251        assert!(FormatValidator::validate_format_arguments("need two: {} {}", &args_one).is_err());
252    }
253}