Skip to main content

thulp_skill_files/
preprocessor.rs

1//! Preprocessor for skill content.
2//!
3//! Handles substitution of:
4//! - `$ARGUMENTS` - replaced with skill invocation arguments
5//! - `!`command`` - replaced with shell command output
6//! - `{{variable}}` - replaced with context values
7//! - `${ENV_VAR}` - replaced with environment variables
8
9use crate::error::{Result, SkillFileError};
10use regex::Regex;
11use std::collections::HashMap;
12
13/// Preprocessor for skill content.
14#[derive(Debug, Clone)]
15pub struct SkillPreprocessor {
16    /// Enable shell command execution.
17    pub enable_commands: bool,
18    /// Timeout for shell commands (seconds).
19    pub command_timeout: u64,
20    /// Enable environment variable substitution.
21    pub enable_env_vars: bool,
22}
23
24impl Default for SkillPreprocessor {
25    fn default() -> Self {
26        Self {
27            enable_commands: true,
28            command_timeout: 30,
29            enable_env_vars: true,
30        }
31    }
32}
33
34impl SkillPreprocessor {
35    /// Create a new preprocessor with default settings.
36    pub fn new() -> Self {
37        Self::default()
38    }
39
40    /// Create a preprocessor with commands disabled (safe mode).
41    pub fn safe() -> Self {
42        Self {
43            enable_commands: false,
44            enable_env_vars: false,
45            ..Default::default()
46        }
47    }
48
49    /// Preprocess skill content with arguments and commands.
50    ///
51    /// Processing order:
52    /// 1. `$ARGUMENTS` substitution
53    /// 2. `!`command`` execution (if enabled)
54    /// 3. `{{variable}}` context substitution
55    /// 4. `${ENV_VAR}` environment substitution (if enabled)
56    pub fn preprocess(
57        &self,
58        content: &str,
59        arguments: &str,
60        context: &HashMap<String, serde_json::Value>,
61    ) -> Result<String> {
62        let mut result = content.to_string();
63
64        // Step 1: Replace $ARGUMENTS
65        result = self.substitute_arguments(&result, arguments);
66
67        // Step 2: Execute !`command` blocks
68        if self.enable_commands {
69            result = self.execute_commands(&result)?;
70        }
71
72        // Step 3: Replace {{variable}} placeholders from context
73        result = self.substitute_variables(&result, context)?;
74
75        // Step 4: Replace ${ENV_VAR} placeholders
76        if self.enable_env_vars {
77            result = self.substitute_env_vars(&result);
78        }
79
80        Ok(result)
81    }
82
83    /// Substitute $ARGUMENTS placeholder.
84    fn substitute_arguments(&self, content: &str, arguments: &str) -> String {
85        content.replace("$ARGUMENTS", arguments)
86    }
87
88    /// Execute !`command` blocks and replace with output.
89    fn execute_commands(&self, content: &str) -> Result<String> {
90        let re = Regex::new(r"!`([^`]+)`")?;
91        let mut result = content.to_string();
92        let mut errors = Vec::new();
93
94        // Collect all matches first to avoid borrowing issues
95        let matches: Vec<_> = re
96            .captures_iter(content)
97            .map(|cap| {
98                (
99                    cap.get(0).unwrap().as_str().to_string(),
100                    cap.get(1).unwrap().as_str().to_string(),
101                )
102            })
103            .collect();
104
105        for (full_match, command) in matches {
106            match self.run_shell_command(&command) {
107                Ok(output) => {
108                    result = result.replace(&full_match, &output);
109                }
110                Err(e) => {
111                    errors.push(format!("Command '{}': {}", command, e));
112                }
113            }
114        }
115
116        if !errors.is_empty() {
117            return Err(SkillFileError::CommandExecution(errors.join("; ")));
118        }
119
120        Ok(result)
121    }
122
123    /// Run a shell command and return its output.
124    fn run_shell_command(&self, command: &str) -> Result<String> {
125        #[cfg(target_os = "windows")]
126        let output = std::process::Command::new("cmd")
127            .args(["/C", command])
128            .output()
129            .map_err(|e| SkillFileError::CommandExecution(e.to_string()))?;
130
131        #[cfg(not(target_os = "windows"))]
132        let output = std::process::Command::new("sh")
133            .arg("-c")
134            .arg(command)
135            .output()
136            .map_err(|e| SkillFileError::CommandExecution(e.to_string()))?;
137
138        if !output.status.success() {
139            let stderr = String::from_utf8_lossy(&output.stderr);
140            return Err(SkillFileError::CommandExecution(format!(
141                "Command failed: {}",
142                stderr
143            )));
144        }
145
146        Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
147    }
148
149    /// Substitute {{variable}} placeholders from context.
150    fn substitute_variables(
151        &self,
152        content: &str,
153        context: &HashMap<String, serde_json::Value>,
154    ) -> Result<String> {
155        let re = Regex::new(r"\{\{([^}]+)\}\}")?;
156        let mut result = content.to_string();
157
158        // Collect all matches first
159        let matches: Vec<_> = re
160            .captures_iter(content)
161            .map(|cap| {
162                (
163                    cap.get(0).unwrap().as_str().to_string(),
164                    cap.get(1).unwrap().as_str().trim().to_string(),
165                )
166            })
167            .collect();
168
169        for (full_match, var_path) in matches {
170            if let Some(value) = self.resolve_path(&var_path, context) {
171                let value_str = match value {
172                    serde_json::Value::String(s) => s.clone(),
173                    serde_json::Value::Null => String::new(),
174                    other => serde_json::to_string(other).unwrap_or_default(),
175                };
176                result = result.replace(&full_match, &value_str);
177            }
178            // Leave unresolved variables as-is (they might be for later processing)
179        }
180
181        Ok(result)
182    }
183
184    /// Resolve a dotted path like "step_name.output.field".
185    fn resolve_path<'a>(
186        &self,
187        path: &str,
188        context: &'a HashMap<String, serde_json::Value>,
189    ) -> Option<&'a serde_json::Value> {
190        let parts: Vec<&str> = path.split('.').collect();
191
192        if parts.is_empty() {
193            return None;
194        }
195
196        let mut current = context.get(parts[0])?;
197
198        for part in &parts[1..] {
199            current = current.get(*part)?;
200        }
201
202        Some(current)
203    }
204
205    /// Substitute ${ENV_VAR} environment variables.
206    fn substitute_env_vars(&self, content: &str) -> String {
207        let re = Regex::new(r"\$\{([A-Z_][A-Z0-9_]*)\}").unwrap();
208
209        re.replace_all(content, |caps: &regex::Captures| {
210            let var_name = caps.get(1).unwrap().as_str();
211            std::env::var(var_name).unwrap_or_default()
212        })
213        .to_string()
214    }
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220    use serde_json::json;
221
222    #[test]
223    fn test_substitute_arguments() {
224        let pp = SkillPreprocessor::new();
225        let result = pp.substitute_arguments("Process: $ARGUMENTS", "file.txt");
226        assert_eq!(result, "Process: file.txt");
227    }
228
229    #[test]
230    fn test_substitute_arguments_multiple() {
231        let pp = SkillPreprocessor::new();
232        let result = pp.substitute_arguments("First: $ARGUMENTS, Second: $ARGUMENTS", "hello");
233        assert_eq!(result, "First: hello, Second: hello");
234    }
235
236    #[test]
237    fn test_substitute_variables_simple() {
238        let pp = SkillPreprocessor::new();
239        let mut context = HashMap::new();
240        context.insert("name".to_string(), json!("Alice"));
241
242        let result = pp
243            .substitute_variables("Hello, {{name}}!", &context)
244            .unwrap();
245        assert_eq!(result, "Hello, Alice!");
246    }
247
248    #[test]
249    fn test_substitute_variables_nested() {
250        let pp = SkillPreprocessor::new();
251        let mut context = HashMap::new();
252        context.insert("user".to_string(), json!({"name": "Bob", "age": 30}));
253
254        let result = pp
255            .substitute_variables("Name: {{user.name}}, Age: {{user.age}}", &context)
256            .unwrap();
257        assert_eq!(result, "Name: Bob, Age: 30");
258    }
259
260    #[test]
261    fn test_substitute_variables_missing() {
262        let pp = SkillPreprocessor::new();
263        let context = HashMap::new();
264
265        let result = pp
266            .substitute_variables("Hello, {{missing}}!", &context)
267            .unwrap();
268        // Missing variables are left as-is
269        assert_eq!(result, "Hello, {{missing}}!");
270    }
271
272    #[test]
273    fn test_substitute_env_vars() {
274        let pp = SkillPreprocessor::new();
275        std::env::set_var("TEST_SKILL_VAR", "test_value");
276
277        let result = pp.substitute_env_vars("Value: ${TEST_SKILL_VAR}");
278        assert_eq!(result, "Value: test_value");
279
280        std::env::remove_var("TEST_SKILL_VAR");
281    }
282
283    #[test]
284    fn test_preprocess_combined() {
285        let pp = SkillPreprocessor::safe(); // Disable commands for test
286        let mut context = HashMap::new();
287        context.insert("project".to_string(), json!("myapp"));
288
289        let content = "Project: {{project}}\nArgs: $ARGUMENTS";
290        let result = pp.preprocess(content, "build --release", &context).unwrap();
291
292        assert_eq!(result, "Project: myapp\nArgs: build --release");
293    }
294
295    #[test]
296    fn test_command_execution_disabled() {
297        let pp = SkillPreprocessor::safe();
298        let context = HashMap::new();
299
300        // Command should not be executed, left as-is
301        let content = "Output: !`echo hello`";
302        let result = pp.preprocess(content, "", &context).unwrap();
303        assert_eq!(result, "Output: !`echo hello`");
304    }
305}