Skip to main content

panache_parser/parser/utils/
chunk_options.rs

1//! Chunk option value classification for Quarto/RMarkdown code blocks.
2//!
3//! This module distinguishes between simple literal values (booleans, numbers, strings)
4//! and complex R expressions (function calls, variables, etc.) to determine which
5//! chunk options can be safely converted to hashpipe format.
6
7/// Classification of chunk option values for conversion to hashpipe format.
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub enum ChunkOptionValue {
10    /// Simple literal value that can be safely converted to YAML syntax.
11    /// Examples: TRUE, FALSE, 7, "string"
12    Simple(String),
13
14    /// Complex R expression that should stay in inline format.
15    /// Examples: paste(...), my_var, nrow(data)
16    Expression(String),
17}
18
19/// Get hashpipe comment prefix for a code chunk language.
20pub fn hashpipe_comment_prefix(language: &str) -> Option<&'static str> {
21    match language.to_ascii_lowercase().as_str() {
22        "r" | "python" | "julia" | "bash" | "shell" | "sh" | "ruby" | "perl" => Some("#|"),
23        "c" | "cpp" | "c++" | "java" | "javascript" | "js" | "typescript" | "ts" | "rust"
24        | "go" | "swift" | "kotlin" | "scala" | "csharp" | "c#" | "php" | "ojs" | "dot" => {
25            Some("//|")
26        }
27        "sql" | "mysql" | "postgres" | "postgresql" | "sqlite" => Some("--|"),
28        "mermaid" => Some("%%|"),
29        _ => None,
30    }
31}
32
33/// Classify a chunk option value as either simple (convertible) or expression (skip).
34///
35/// Conservative approach: only classify as Simple if we're certain it's a literal.
36/// When in doubt, classify as Expression to avoid breaking R code.
37///
38/// **Note**: The parser strips quotes from values, so we receive the inner string.
39/// For `label="my chunk"`, value is `"my chunk"` (no quotes).
40pub fn classify_value(value: &Option<String>) -> ChunkOptionValue {
41    match value {
42        None => ChunkOptionValue::Simple(String::new()), // Bare flag like `echo` is treated as true
43        Some(v) => {
44            // Parser strips quotes, so we get the inner value
45            // Check if it looks like an R expression
46            if is_boolean_literal(v) || is_numeric_literal(v) || is_simple_string(v) {
47                ChunkOptionValue::Simple(v.clone())
48            } else {
49                ChunkOptionValue::Expression(v.clone())
50            }
51        }
52    }
53}
54
55/// Check if a string value is simple enough to be safely formatted.
56///
57/// Returns false for strings that look like R expressions (function calls, operators, variables).
58fn is_simple_string(s: &str) -> bool {
59    // Empty strings are simple
60    if s.is_empty() {
61        return true;
62    }
63
64    // If it contains R expression characters, it's complex
65    if s.contains('(')
66        || s.contains(')')
67        || s.contains('{')
68        || s.contains('}')
69        || s.contains('$')
70        || s.contains('[')
71        || s.contains(']')
72        || s.contains('+')
73        || s.contains('-')
74        || s.contains('*')
75        || s.contains('/')
76        || s.contains('<')
77        || s.contains('>')
78        || s.contains('!')
79        || s.contains(':')
80    {
81        return false;
82    }
83
84    // If it's a single bareword (could be a variable), it's complex
85    // unless it contains spaces or special chars (then it's a string literal)
86    if !s.contains(' ')
87        && !s.contains('.')
88        && !s.contains('/')
89        && !s.contains('\\')
90        && !s.contains(',')
91        && s.chars().all(|c| c.is_alphanumeric() || c == '_')
92    {
93        // Looks like a variable name
94        return false;
95    }
96
97    // Otherwise, treat as simple string (phrases, paths with dots/slashes)
98    true
99}
100
101/// Check if a string is an R boolean literal.
102///
103/// Accepts: TRUE, FALSE, T, F (R's boolean constants)
104pub fn is_boolean_literal(s: &str) -> bool {
105    matches!(s, "TRUE" | "FALSE" | "T" | "F")
106}
107
108/// Check if a string is a numeric literal.
109///
110/// Accepts: integers (7, -3) and floats (3.14, -2.5, 1e-5)
111pub fn is_numeric_literal(s: &str) -> bool {
112    // Try parsing as f64 to catch integers and floats
113    s.parse::<f64>().is_ok()
114}
115
116/// Check if a string is a quoted string literal.
117///
118/// Accepts both single and double quoted strings.
119/// Does not validate escape sequences - just checks for matching quotes.
120pub fn is_quoted_string(s: &str) -> bool {
121    (s.starts_with('"') && s.ends_with('"') && s.len() >= 2)
122        || (s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2)
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128
129    #[test]
130    fn test_is_boolean_literal() {
131        assert!(is_boolean_literal("TRUE"));
132        assert!(is_boolean_literal("FALSE"));
133        assert!(is_boolean_literal("T"));
134        assert!(is_boolean_literal("F"));
135
136        assert!(!is_boolean_literal("true"));
137        assert!(!is_boolean_literal("false"));
138        assert!(!is_boolean_literal("True"));
139        assert!(!is_boolean_literal("MAYBE"));
140    }
141
142    #[test]
143    fn test_is_numeric_literal() {
144        // Integers
145        assert!(is_numeric_literal("7"));
146        assert!(is_numeric_literal("0"));
147        assert!(is_numeric_literal("-3"));
148        assert!(is_numeric_literal("100"));
149
150        // Floats
151        assert!(is_numeric_literal("3.14"));
152        assert!(is_numeric_literal("-2.5"));
153        assert!(is_numeric_literal("0.1"));
154
155        // Scientific notation
156        assert!(is_numeric_literal("1e5"));
157        assert!(is_numeric_literal("1.5e-3"));
158
159        // Not numeric
160        assert!(!is_numeric_literal("abc"));
161        assert!(!is_numeric_literal("7x"));
162        assert!(!is_numeric_literal(""));
163    }
164
165    #[test]
166    fn test_is_quoted_string() {
167        // Double quotes
168        assert!(is_quoted_string("\"hello\""));
169        assert!(is_quoted_string("\"with spaces\""));
170        assert!(is_quoted_string("\"\""));
171
172        // Single quotes
173        assert!(is_quoted_string("'hello'"));
174        assert!(is_quoted_string("'with spaces'"));
175        assert!(is_quoted_string("''"));
176
177        // Not quoted
178        assert!(!is_quoted_string("hello"));
179        assert!(!is_quoted_string("\""));
180        assert!(!is_quoted_string("'"));
181        assert!(!is_quoted_string("\"hello'"));
182        assert!(!is_quoted_string("'hello\""));
183        assert!(!is_quoted_string(""));
184    }
185
186    #[test]
187    fn test_classify_boolean() {
188        let result = classify_value(&Some("TRUE".to_string()));
189        assert_eq!(result, ChunkOptionValue::Simple("TRUE".to_string()));
190
191        let result = classify_value(&Some("FALSE".to_string()));
192        assert_eq!(result, ChunkOptionValue::Simple("FALSE".to_string()));
193    }
194
195    #[test]
196    fn test_classify_number() {
197        let result = classify_value(&Some("7".to_string()));
198        assert_eq!(result, ChunkOptionValue::Simple("7".to_string()));
199
200        let result = classify_value(&Some("3.14".to_string()));
201        assert_eq!(result, ChunkOptionValue::Simple("3.14".to_string()));
202    }
203
204    #[test]
205    fn test_classify_quoted_string() {
206        let result = classify_value(&Some("\"hello\"".to_string()));
207        assert_eq!(result, ChunkOptionValue::Simple("\"hello\"".to_string()));
208
209        let result = classify_value(&Some("'world'".to_string()));
210        assert_eq!(result, ChunkOptionValue::Simple("'world'".to_string()));
211    }
212
213    #[test]
214    fn test_classify_function_call() {
215        let result = classify_value(&Some("paste(\"a\", \"b\")".to_string()));
216        assert_eq!(
217            result,
218            ChunkOptionValue::Expression("paste(\"a\", \"b\")".to_string())
219        );
220    }
221
222    #[test]
223    fn test_classify_variable() {
224        let result = classify_value(&Some("my_var".to_string()));
225        assert_eq!(result, ChunkOptionValue::Expression("my_var".to_string()));
226    }
227
228    #[test]
229    fn test_classify_none() {
230        let result = classify_value(&None);
231        assert_eq!(result, ChunkOptionValue::Simple(String::new()));
232    }
233
234    #[test]
235    fn test_classify_expression_with_operators() {
236        let result = classify_value(&Some("x + y".to_string()));
237        assert_eq!(result, ChunkOptionValue::Expression("x + y".to_string()));
238
239        let result = classify_value(&Some("data$col".to_string()));
240        assert_eq!(result, ChunkOptionValue::Expression("data$col".to_string()));
241
242        let result = classify_value(&Some("vec[1]".to_string()));
243        assert_eq!(result, ChunkOptionValue::Expression("vec[1]".to_string()));
244    }
245
246    #[test]
247    fn test_hashpipe_comment_prefix() {
248        assert_eq!(hashpipe_comment_prefix("r"), Some("#|"));
249        assert_eq!(hashpipe_comment_prefix("cpp"), Some("//|"));
250        assert_eq!(hashpipe_comment_prefix("sql"), Some("--|"));
251        assert_eq!(hashpipe_comment_prefix("mermaid"), Some("%%|"));
252        assert_eq!(hashpipe_comment_prefix("fortran"), None);
253    }
254}