Skip to main content

symbi_runtime/toolclad/
executor.rs

1//! ToolClad executor — bridges ORGA loop to .clad.toml tool manifests
2//!
3//! Implements the `ActionExecutor` trait: receives tool calls, validates
4//! arguments, constructs commands from templates, executes, and returns
5//! structured JSON observations.
6//!
7//! Supports built-in output parsers (json, xml, csv, jsonl, text), custom
8//! external parsers, and output schema validation.
9
10use async_trait::async_trait;
11use std::collections::HashMap;
12use std::io::Write;
13use std::time::Duration;
14
15use super::manifest::Manifest;
16use super::validator;
17use crate::reasoning::circuit_breaker::CircuitBreakerRegistry;
18use crate::reasoning::executor::ActionExecutor;
19use crate::reasoning::inference::ToolDefinition;
20use crate::reasoning::loop_types::{LoopConfig, Observation, ProposedAction};
21
22use super::manifest::ArgDef;
23
24/// An executor that dispatches tool calls to ToolClad manifests.
25/// Handles all five backends: shell, HTTP, MCP proxy, session (PTY), browser (CDP).
26pub struct ToolCladExecutor {
27    manifests: HashMap<String, Manifest>,
28    tool_defs: Vec<ToolDefinition>,
29    custom_types: HashMap<String, ArgDef>,
30    /// Manifest versions recorded at construction time for hot-reload detection.
31    manifest_versions: HashMap<String, String>,
32    /// Session executor for interactive CLI tools.
33    session_executor: super::session_executor::SessionExecutor,
34    /// Browser executor for CDP-based browser sessions.
35    browser_executor: super::browser_executor::BrowserExecutor,
36}
37
38impl ToolCladExecutor {
39    /// Create an executor from a set of loaded manifests.
40    pub fn new(manifests: Vec<(String, Manifest)>) -> Self {
41        Self::with_custom_types(manifests, HashMap::new())
42    }
43
44    /// Create an executor with custom type definitions loaded from `toolclad.toml`.
45    pub fn with_custom_types(
46        manifests: Vec<(String, Manifest)>,
47        custom_types: HashMap<String, ArgDef>,
48    ) -> Self {
49        let tool_defs: Vec<ToolDefinition> = manifests
50            .iter()
51            .flat_map(|(_, m)| generate_tool_definitions(m))
52            .collect();
53        let manifest_versions: HashMap<String, String> = manifests
54            .iter()
55            .map(|(name, m)| (name.clone(), m.tool.version.clone()))
56            .collect();
57        // Create sub-executors for session and browser modes
58        let session_manifests: Vec<_> = manifests
59            .iter()
60            .filter(|(_, m)| m.tool.mode == "session")
61            .map(|(n, m)| (n.clone(), m.clone()))
62            .collect();
63        let browser_manifests: Vec<_> = manifests
64            .iter()
65            .filter(|(_, m)| m.tool.mode == "browser")
66            .map(|(n, m)| (n.clone(), m.clone()))
67            .collect();
68        let session_executor = super::session_executor::SessionExecutor::new(session_manifests);
69        let browser_executor = super::browser_executor::BrowserExecutor::new(browser_manifests);
70
71        let manifest_map: HashMap<String, Manifest> = manifests.into_iter().collect();
72        Self {
73            manifests: manifest_map,
74            tool_defs,
75            custom_types,
76            manifest_versions,
77            session_executor,
78            browser_executor,
79        }
80    }
81
82    /// Check if this executor handles a given tool name.
83    /// Matches both direct tool names and session/browser sub-commands
84    /// (e.g., "msfconsole_session" or "msfconsole_session.run").
85    pub fn handles(&self, tool_name: &str) -> bool {
86        if self.manifests.contains_key(tool_name) {
87            return true;
88        }
89        // Check session and browser executors
90        if self.session_executor.handles(tool_name) || self.browser_executor.handles(tool_name) {
91            return true;
92        }
93        // Check for session/browser sub-command pattern: "toolname.command"
94        if let Some(base) = tool_name.split('.').next() {
95            if let Some(m) = self.manifests.get(base) {
96                let cmd = tool_name
97                    .strip_prefix(base)
98                    .unwrap_or("")
99                    .trim_start_matches('.');
100                if let Some(session) = &m.session {
101                    return session.commands.contains_key(cmd);
102                }
103                if let Some(browser) = &m.browser {
104                    return browser.commands.contains_key(cmd);
105                }
106            }
107        }
108        false
109    }
110
111    /// Get tool definitions (convenience method that doesn't require importing ActionExecutor).
112    pub fn get_tool_definitions(&self) -> Vec<crate::reasoning::inference::ToolDefinition> {
113        self.tool_defs.clone()
114    }
115
116    /// Number of loaded manifests.
117    pub fn count(&self) -> usize {
118        self.manifests.len()
119    }
120
121    /// Execute a single tool call against a manifest.
122    pub fn execute_tool(&self, name: &str, args_json: &str) -> Result<serde_json::Value, String> {
123        let manifest = self
124            .manifests
125            .get(name)
126            .ok_or_else(|| format!("No ToolClad manifest for '{}'", name))?;
127
128        // Check manifest version against recorded version (hot-reload detection)
129        if let Some(recorded_version) = self.manifest_versions.get(name) {
130            if *recorded_version != manifest.tool.version {
131                return Err(format!(
132                    "Manifest version mismatch for '{}': executor was built with v{} but manifest \
133                     is now v{}. The tool definition may have changed — please re-plan.",
134                    name, recorded_version, manifest.tool.version
135                ));
136            }
137        }
138
139        // Parse arguments from JSON
140        let args: HashMap<String, serde_json::Value> = serde_json::from_str(args_json)
141            .map_err(|e| format!("Invalid arguments JSON: {}", e))?;
142
143        // Validate each argument against its definition
144        let mut validated: HashMap<String, String> = HashMap::new();
145        for (arg_name, arg_def) in &manifest.args {
146            let value = if let Some(v) = args.get(arg_name) {
147                match v {
148                    serde_json::Value::String(s) => s.clone(),
149                    other => other.to_string().trim_matches('"').to_string(),
150                }
151            } else if arg_def.required {
152                return Err(format!("Missing required argument: {}", arg_name));
153            } else if let Some(default) = &arg_def.default {
154                default.to_string().trim_matches('"').to_string()
155            } else {
156                String::new()
157            };
158
159            if !value.is_empty() {
160                let custom = if self.custom_types.is_empty() {
161                    None
162                } else {
163                    Some(&self.custom_types)
164                };
165                let cleaned = validator::validate_arg_with_custom(arg_def, &value, custom)
166                    .map_err(|e| format!("Validation failed for '{}': {}", arg_name, e))?;
167                validated.insert(arg_name.clone(), cleaned);
168            } else {
169                validated.insert(arg_name.clone(), value);
170            }
171        }
172
173        // Dispatch to appropriate backend
174        if manifest.http.is_some() {
175            return self.execute_http_backend(name, manifest, &validated);
176        }
177        if manifest.mcp.is_some() {
178            return self.execute_mcp_backend(name, manifest, &validated);
179        }
180
181        // Build command from template (shell backend)
182        let command = build_command(manifest, &validated)?;
183
184        // Execute with timeout — use direct argv to prevent shell injection
185        let _timeout = Duration::from_secs(manifest.tool.timeout_seconds);
186        let start = std::time::Instant::now();
187        let argv = split_command_to_argv(&command)?;
188        let (program, args) = argv
189            .split_first()
190            .ok_or_else(|| "Empty command after template interpolation".to_string())?;
191        let output = std::process::Command::new(program)
192            .args(args)
193            .output()
194            .map_err(|e| format!("Failed to execute '{}': {}", program, e))?;
195
196        let duration_ms = start.elapsed().as_millis() as u64;
197        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
198        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
199
200        // Parse output using the manifest's format/parser configuration
201        let parsed = parse_output(manifest, stdout.trim())?;
202
203        // Validate parsed output against schema (warnings only, non-fatal)
204        let schema_warnings = validate_output_schema(&parsed, &manifest.output.schema);
205
206        // Build evidence envelope
207        let scan_id = format!(
208            "{}-{}",
209            chrono::Utc::now().timestamp(),
210            uuid::Uuid::new_v4().as_fields().0
211        );
212        let status = if output.status.success() {
213            "success"
214        } else {
215            "error"
216        };
217
218        // Hash output for evidence chain
219        use sha2::{Digest, Sha256};
220        let mut hasher = Sha256::new();
221        hasher.update(stdout.as_bytes());
222        let hash = format!("sha256:{}", hex::encode(hasher.finalize()));
223
224        let mut envelope = serde_json::json!({
225            "status": status,
226            "scan_id": scan_id,
227            "tool": name,
228            "command": command,
229            "duration_ms": duration_ms,
230            "timestamp": chrono::Utc::now().to_rfc3339(),
231            "output_hash": hash,
232            "results": parsed,
233        });
234
235        // Attach stderr and exit_code to the results
236        if let Some(obj) = envelope.as_object_mut() {
237            if let Some(results) = obj.get_mut("results").and_then(|r| r.as_object_mut()) {
238                if !stderr.is_empty() {
239                    results.insert(
240                        "stderr".to_string(),
241                        serde_json::Value::String(stderr.trim().to_string()),
242                    );
243                }
244                results.insert(
245                    "exit_code".to_string(),
246                    serde_json::json!(output.status.code()),
247                );
248            }
249        }
250
251        // Attach schema warnings if any
252        if !schema_warnings.is_empty() {
253            if let Some(obj) = envelope.as_object_mut() {
254                obj.insert(
255                    "schema_warnings".to_string(),
256                    serde_json::json!(schema_warnings),
257                );
258            }
259        }
260
261        Ok(envelope)
262    }
263
264    /// Execute an HTTP backend tool.
265    fn execute_http_backend(
266        &self,
267        name: &str,
268        manifest: &Manifest,
269        validated: &HashMap<String, String>,
270    ) -> Result<serde_json::Value, String> {
271        let http = manifest.http.as_ref().unwrap();
272
273        // Interpolate URL with args and secrets
274        let url = interpolate(&http.url, validated);
275        let url = super::template_vars::inject_secrets(&url)
276            .map_err(|e| format!("URL secret error: {}", e))?;
277
278        // SSRF protection: block private/internal IP ranges
279        reject_ssrf_url(&url)?;
280
281        // Interpolate headers with secrets
282        let mut headers = Vec::new();
283        for (key, val) in &http.headers {
284            let resolved = interpolate(val, validated);
285            let resolved = super::template_vars::inject_secrets(&resolved)
286                .map_err(|e| format!("Header secret error: {}", e))?;
287            headers.push((key.clone(), resolved));
288        }
289
290        // Interpolate body
291        let body = http
292            .body_template
293            .as_ref()
294            .map(|t| {
295                let b = interpolate(t, validated);
296                super::template_vars::inject_secrets(&b)
297            })
298            .transpose()
299            .map_err(|e| format!("Body secret error: {}", e))?;
300
301        // Execute HTTP request
302        let client = reqwest::blocking::Client::new();
303        let timeout = std::time::Duration::from_secs(manifest.tool.timeout_seconds);
304        let mut request = match http.method.to_uppercase().as_str() {
305            "GET" => client.get(&url),
306            "POST" => client.post(&url),
307            "PUT" => client.put(&url),
308            "DELETE" => client.delete(&url),
309            "PATCH" => client.patch(&url),
310            "HEAD" => client.head(&url),
311            other => return Err(format!("Unsupported HTTP method: {}", other)),
312        };
313
314        request = request.timeout(timeout);
315        for (key, val) in &headers {
316            request = request.header(key.as_str(), val.as_str());
317        }
318        if let Some(body_str) = &body {
319            request = request.body(body_str.clone());
320        }
321
322        let start = std::time::Instant::now();
323        let response = request
324            .send()
325            .map_err(|e| format!("HTTP request failed: {}", e))?;
326        let duration_ms = start.elapsed().as_millis() as u64;
327
328        let status_code = response.status().as_u16();
329        let response_body = response
330            .text()
331            .map_err(|e| format!("Failed to read response: {}", e))?;
332
333        let is_success = if !http.success_status.is_empty() {
334            http.success_status.contains(&status_code)
335        } else {
336            (200..300).contains(&status_code)
337        };
338
339        // Parse response
340        let parsed = parse_output(manifest, &response_body);
341        let results = parsed.unwrap_or_else(|_| serde_json::json!({"raw_output": response_body}));
342
343        let scan_id = format!(
344            "{}-{}",
345            chrono::Utc::now().timestamp(),
346            uuid::Uuid::new_v4().as_fields().0
347        );
348
349        use sha2::{Digest, Sha256};
350        let mut hasher = Sha256::new();
351        hasher.update(response_body.as_bytes());
352        let hash = format!("sha256:{}", hex::encode(hasher.finalize()));
353
354        Ok(serde_json::json!({
355            "status": if is_success { "success" } else { "error" },
356            "scan_id": scan_id,
357            "tool": name,
358            "http_method": http.method,
359            "http_url": url,
360            "http_status": status_code,
361            "duration_ms": duration_ms,
362            "timestamp": chrono::Utc::now().to_rfc3339(),
363            "output_hash": hash,
364            "exit_code": if is_success { 0 } else { status_code as i32 },
365            "stderr": "",
366            "results": results
367        }))
368    }
369
370    /// Execute an MCP proxy backend tool.
371    fn execute_mcp_backend(
372        &self,
373        name: &str,
374        manifest: &Manifest,
375        validated: &HashMap<String, String>,
376    ) -> Result<serde_json::Value, String> {
377        let mcp = manifest.mcp.as_ref().unwrap();
378
379        // Map validated args to upstream tool's expected format
380        let mut upstream_args = serde_json::Map::new();
381        for (local_name, value) in validated {
382            let upstream_name = mcp
383                .field_map
384                .get(local_name)
385                .cloned()
386                .unwrap_or_else(|| local_name.clone());
387            upstream_args.insert(upstream_name, serde_json::json!(value));
388        }
389
390        let scan_id = format!(
391            "{}-{}",
392            chrono::Utc::now().timestamp(),
393            uuid::Uuid::new_v4().as_fields().0
394        );
395
396        // Note: Full MCP execution requires the runtime's MCP transport.
397        // For now, build and return the request structure. The runtime's
398        // EnforcedActionExecutor will forward to the actual MCP server.
399        Ok(serde_json::json!({
400            "status": "delegated",
401            "scan_id": scan_id,
402            "tool": name,
403            "mcp_server": mcp.server,
404            "mcp_tool": mcp.tool,
405            "mcp_arguments": upstream_args,
406            "timestamp": chrono::Utc::now().to_rfc3339(),
407            "exit_code": 0,
408            "stderr": "",
409            "results": {
410                "delegated_to": format!("{}:{}", mcp.server, mcp.tool),
411                "arguments": upstream_args,
412            }
413        }))
414    }
415}
416
417#[async_trait]
418impl ActionExecutor for ToolCladExecutor {
419    async fn execute_actions(
420        &self,
421        actions: &[ProposedAction],
422        _config: &LoopConfig,
423        _circuit_breakers: &CircuitBreakerRegistry,
424    ) -> Vec<Observation> {
425        let mut observations = Vec::new();
426
427        for action in actions {
428            if let ProposedAction::ToolCall {
429                call_id,
430                name,
431                arguments,
432            } = action
433            {
434                if !self.handles(name) {
435                    continue; // Not a ToolClad tool — skip
436                }
437
438                // Dispatch to appropriate executor based on tool type
439                let result = if self.session_executor.handles(name) {
440                    self.session_executor
441                        .execute_session_command(name, arguments)
442                } else if self.browser_executor.handles(name) {
443                    self.browser_executor
444                        .execute_browser_command(name, arguments)
445                } else {
446                    self.execute_tool(name, arguments)
447                };
448
449                let (content, is_error) = match result {
450                    Ok(envelope) => (
451                        serde_json::to_string_pretty(&envelope).unwrap_or_default(),
452                        false,
453                    ),
454                    Err(e) => (format!("ToolClad error: {}", e), true),
455                };
456
457                observations.push(Observation {
458                    source: format!("toolclad:{}", name),
459                    content,
460                    is_error,
461                    call_id: Some(call_id.clone()),
462                    metadata: HashMap::new(),
463                });
464            }
465        }
466
467        observations
468    }
469
470    fn tool_definitions(&self) -> Vec<ToolDefinition> {
471        self.tool_defs.clone()
472    }
473}
474
475// ---- Output Parsing ----
476
477/// Parse raw tool output based on the manifest's `output.format` and `output.parser` fields.
478fn parse_output(manifest: &Manifest, raw_output: &str) -> Result<serde_json::Value, String> {
479    let default_parser = match manifest.output.format.as_str() {
480        "json" => "builtin:json",
481        "xml" => "builtin:xml",
482        "csv" => "builtin:csv",
483        "jsonl" => "builtin:jsonl",
484        _ => "builtin:text",
485    };
486    let parser = manifest.output.parser.as_deref().unwrap_or(default_parser);
487
488    match parser {
489        "builtin:json" => parse_json(raw_output),
490        "builtin:xml" => parse_xml(raw_output),
491        "builtin:csv" => parse_csv(raw_output),
492        "builtin:jsonl" => parse_jsonl(raw_output),
493        "builtin:text" => Ok(serde_json::json!({"raw_output": raw_output})),
494        custom => run_custom_parser(custom, raw_output),
495    }
496}
497
498/// Parse raw output as JSON.
499fn parse_json(raw_output: &str) -> Result<serde_json::Value, String> {
500    serde_json::from_str(raw_output).map_err(|e| format!("Failed to parse output as JSON: {}", e))
501}
502
503/// Parse raw output as XML (placeholder — wraps as string since full XML-to-JSON
504/// conversion would require a crate like `quick-xml`).
505fn parse_xml(raw_output: &str) -> Result<serde_json::Value, String> {
506    Ok(serde_json::json!({
507        "xml_output": raw_output,
508        "_note": "Basic XML wrapping; install quick-xml for full XML-to-JSON conversion"
509    }))
510}
511
512/// Parse raw output as CSV: first line is headers, subsequent lines are data rows.
513/// Returns an array of objects.
514fn parse_csv(raw_output: &str) -> Result<serde_json::Value, String> {
515    let mut lines = raw_output.lines();
516
517    let header_line = lines.next().ok_or("CSV output is empty — no header row")?;
518    let headers: Vec<&str> = header_line.split(',').map(|h| h.trim()).collect();
519
520    let mut rows = Vec::new();
521    for line in lines {
522        let line = line.trim();
523        if line.is_empty() {
524            continue;
525        }
526        let values: Vec<&str> = line.split(',').map(|v| v.trim()).collect();
527        let mut row = serde_json::Map::new();
528        for (i, header) in headers.iter().enumerate() {
529            let value = values.get(i).copied().unwrap_or("");
530            row.insert(
531                header.to_string(),
532                serde_json::Value::String(value.to_string()),
533            );
534        }
535        rows.push(serde_json::Value::Object(row));
536    }
537
538    Ok(serde_json::Value::Array(rows))
539}
540
541/// Parse raw output as JSON Lines: each line is a separate JSON value.
542/// Returns an array of parsed values.
543fn parse_jsonl(raw_output: &str) -> Result<serde_json::Value, String> {
544    let mut items = Vec::new();
545    for (i, line) in raw_output.lines().enumerate() {
546        let line = line.trim();
547        if line.is_empty() {
548            continue;
549        }
550        let value: serde_json::Value = serde_json::from_str(line)
551            .map_err(|e| format!("Failed to parse JSONL line {}: {}", i + 1, e))?;
552        items.push(value);
553    }
554    Ok(serde_json::Value::Array(items))
555}
556
557/// Run a custom external parser. Writes raw_output to a temp file, executes
558/// the parser binary with the temp file path as argv[1], and captures stdout
559/// as JSON.
560fn run_custom_parser(parser_path: &str, raw_output: &str) -> Result<serde_json::Value, String> {
561    let mut tmp = tempfile::NamedTempFile::new()
562        .map_err(|e| format!("Failed to create temp file for custom parser: {}", e))?;
563
564    tmp.write_all(raw_output.as_bytes())
565        .map_err(|e| format!("Failed to write to temp file: {}", e))?;
566
567    let tmp_path = tmp.path().to_string_lossy().to_string();
568
569    let output = std::process::Command::new(parser_path)
570        .arg(&tmp_path)
571        .output()
572        .map_err(|e| format!("Failed to execute custom parser '{}': {}", parser_path, e))?;
573
574    if !output.status.success() {
575        let stderr = String::from_utf8_lossy(&output.stderr);
576        return Err(format!(
577            "Custom parser '{}' exited with {}: {}",
578            parser_path,
579            output.status.code().unwrap_or(-1),
580            stderr.trim()
581        ));
582    }
583
584    let stdout = String::from_utf8_lossy(&output.stdout);
585    serde_json::from_str(stdout.trim()).map_err(|e| {
586        format!(
587            "Custom parser '{}' produced invalid JSON: {}",
588            parser_path, e
589        )
590    })
591}
592
593// ---- Output Schema Validation ----
594
595/// Validate parsed output against the manifest's output schema.
596/// Returns a list of warnings (never fails — partial results are OK).
597fn validate_output_schema(parsed: &serde_json::Value, schema: &serde_json::Value) -> Vec<String> {
598    let mut warnings = Vec::new();
599
600    // If schema has no properties defined, skip validation
601    let properties = match schema.get("properties").and_then(|p| p.as_object()) {
602        Some(props) => props,
603        None => return warnings,
604    };
605
606    // If parsed output is wrapped as raw_output, skip property checks
607    if parsed.get("raw_output").is_some() {
608        return warnings;
609    }
610
611    // Check required properties
612    let required: Vec<&str> = schema
613        .get("required")
614        .and_then(|r| r.as_array())
615        .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
616        .unwrap_or_default();
617
618    for key in required {
619        if parsed.get(key).is_none() {
620            warnings.push(format!(
621                "Required property '{}' missing from parsed output",
622                key
623            ));
624        }
625    }
626
627    // Check declared properties exist and types match
628    for (key, prop_schema) in properties {
629        if let Some(value) = parsed.get(key) {
630            if let Some(expected_type) = prop_schema.get("type").and_then(|t| t.as_str()) {
631                let type_ok = match expected_type {
632                    "string" => value.is_string(),
633                    "number" => value.is_number(),
634                    "integer" => value.is_i64() || value.is_u64(),
635                    "boolean" => value.is_boolean(),
636                    "array" => value.is_array(),
637                    "object" => value.is_object(),
638                    "null" => value.is_null(),
639                    _ => true, // Unknown type — don't warn
640                };
641                if !type_ok {
642                    warnings.push(format!(
643                        "Property '{}' has type '{}' but expected '{}'",
644                        key,
645                        json_type_name(value),
646                        expected_type
647                    ));
648                }
649            }
650        }
651    }
652
653    warnings
654}
655
656/// Return a human-readable type name for a JSON value.
657fn json_type_name(value: &serde_json::Value) -> &'static str {
658    match value {
659        serde_json::Value::Null => "null",
660        serde_json::Value::Bool(_) => "boolean",
661        serde_json::Value::Number(_) => "number",
662        serde_json::Value::String(_) => "string",
663        serde_json::Value::Array(_) => "array",
664        serde_json::Value::Object(_) => "object",
665    }
666}
667
668/// Build a command string from a manifest template and validated arguments.
669fn build_command(manifest: &Manifest, args: &HashMap<String, String>) -> Result<String, String> {
670    let template = manifest
671        .command
672        .template
673        .as_ref()
674        .ok_or("No command template defined (and no custom executor)")?;
675
676    let mut result = template.clone();
677
678    // Apply defaults
679    for (key, val) in &manifest.command.defaults {
680        let placeholder = format!("{{{}}}", key);
681        if result.contains(&placeholder) && !args.contains_key(key) {
682            result = result.replace(&placeholder, val.to_string().trim_matches('"'));
683        }
684    }
685
686    // Apply mappings — e.g., scan_type -> _scan_flags
687    for (arg_name, mapping) in &manifest.command.mappings {
688        if let Some(arg_value) = args.get(arg_name) {
689            if let Some(flags) = mapping.get(arg_value) {
690                // Convention: _{arg_name}_flags or _scan_flags
691                let mapped_var = format!("{{_{}_flags}}", arg_name);
692                result = result.replace(&mapped_var, flags);
693                // Also try the generic _scan_flags pattern
694                result = result.replace("{_scan_flags}", flags);
695            }
696        }
697    }
698
699    // Apply conditionals
700    for (cond_name, cond_def) in &manifest.command.conditionals {
701        let placeholder = format!("{{_{}}}", cond_name);
702        if evaluate_condition(&cond_def.when, args) {
703            result = result.replace(&placeholder, &interpolate(&cond_def.template, args));
704        } else {
705            result = result.replace(&placeholder, "");
706        }
707    }
708
709    // Interpolate remaining arg placeholders
710    result = interpolate(&result, args);
711
712    // Auto-generated variables
713    let scan_id = format!("{}", chrono::Utc::now().timestamp());
714    result = result.replace("{_scan_id}", &scan_id);
715    result = result.replace("{_output_file}", "/dev/null");
716    result = result.replace("{_evidence_dir}", "/tmp/evidence");
717
718    // Clean up multiple spaces
719    let result = result.split_whitespace().collect::<Vec<_>>().join(" ");
720
721    Ok(result)
722}
723
724/// Simple condition evaluator for `when` expressions.
725fn evaluate_condition(when: &str, args: &HashMap<String, String>) -> bool {
726    // Support: "argname != ''" and "argname == 'value'" and "argname != 0"
727    let when = when.trim();
728
729    if when.contains(" and ") {
730        return when
731            .split(" and ")
732            .all(|part| evaluate_condition(part, args));
733    }
734
735    if when.contains("!=") {
736        let parts: Vec<&str> = when.splitn(2, "!=").collect();
737        let key = parts[0].trim();
738        let expected = parts[1].trim().trim_matches('\'').trim_matches('"');
739        let actual = args.get(key).map(|s| s.as_str()).unwrap_or("");
740        return actual != expected;
741    }
742
743    if when.contains("==") {
744        let parts: Vec<&str> = when.splitn(2, "==").collect();
745        let key = parts[0].trim();
746        let expected = parts[1].trim().trim_matches('\'').trim_matches('"');
747        let actual = args.get(key).map(|s| s.as_str()).unwrap_or("");
748        return actual == expected;
749    }
750
751    false
752}
753
754/// Reject URLs targeting private/internal IP ranges to prevent SSRF.
755fn reject_ssrf_url(url: &str) -> Result<(), String> {
756    let parsed = url::Url::parse(url).map_err(|e| format!("Invalid URL '{}': {}", url, e))?;
757
758    // Only allow http/https
759    if !matches!(parsed.scheme(), "http" | "https") {
760        return Err(format!(
761            "SSRF: only http/https schemes allowed, got '{}'",
762            parsed.scheme()
763        ));
764    }
765
766    if let Some(host) = parsed.host_str() {
767        // Block localhost variants
768        if host == "localhost" || host == "127.0.0.1" || host == "::1" || host == "[::1]" {
769            return Err("SSRF: cannot access localhost".to_string());
770        }
771
772        // Block cloud metadata endpoints
773        if host == "169.254.169.254" || host == "metadata.google.internal" {
774            return Err("SSRF: cannot access cloud metadata endpoint".to_string());
775        }
776
777        // Block private IP ranges
778        if let Ok(ip) = host.parse::<std::net::IpAddr>() {
779            let is_private = match ip {
780                std::net::IpAddr::V4(v4) => {
781                    v4.is_loopback()
782                        || v4.is_private()
783                        || v4.is_link_local()
784                        || v4.is_broadcast()
785                        || v4.is_unspecified()
786                }
787                std::net::IpAddr::V6(v6) => v6.is_loopback() || v6.is_unspecified(),
788            };
789            if is_private {
790                return Err(format!("SSRF: cannot access private IP range {}", ip));
791            }
792        }
793    }
794
795    Ok(())
796}
797
798/// Split a command string into argv (program + arguments).
799///
800/// Handles single and double quoting so that arguments containing spaces
801/// are preserved as a single element. Does NOT invoke a shell — this
802/// prevents shell metacharacter injection.
803fn split_command_to_argv(command: &str) -> Result<Vec<String>, String> {
804    let mut argv = Vec::new();
805    let mut current = String::new();
806    let mut chars = command.chars().peekable();
807    let mut in_single_quote = false;
808    let mut in_double_quote = false;
809
810    while let Some(c) = chars.next() {
811        match c {
812            '\'' if !in_double_quote => in_single_quote = !in_single_quote,
813            '"' if !in_single_quote => in_double_quote = !in_double_quote,
814            '\\' if !in_single_quote => {
815                if let Some(next) = chars.next() {
816                    current.push(next);
817                }
818            }
819            ' ' | '\t' if !in_single_quote && !in_double_quote => {
820                if !current.is_empty() {
821                    argv.push(std::mem::take(&mut current));
822                }
823            }
824            _ => current.push(c),
825        }
826    }
827    if !current.is_empty() {
828        argv.push(current);
829    }
830    if in_single_quote || in_double_quote {
831        return Err("Unterminated quote in command template".to_string());
832    }
833    if argv.is_empty() {
834        return Err("Empty command after template interpolation".to_string());
835    }
836    Ok(argv)
837}
838
839/// Interpolate {placeholder} references in a string.
840fn interpolate(template: &str, args: &HashMap<String, String>) -> String {
841    let mut result = template.to_string();
842    for (key, value) in args {
843        result = result.replace(&format!("{{{}}}", key), value);
844    }
845    result
846}
847
848/// Generate MCP-compatible ToolDefinitions from a manifest.
849/// Oneshot tools produce one definition. Session/browser tools produce
850/// one definition per declared command (e.g., "msfconsole_session.run").
851fn generate_tool_definitions(manifest: &Manifest) -> Vec<ToolDefinition> {
852    match manifest.tool.mode.as_str() {
853        "session" => generate_session_tool_defs(manifest),
854        "browser" => generate_browser_tool_defs(manifest),
855        _ => vec![generate_oneshot_tool_def(manifest)],
856    }
857}
858
859/// Generate tool definitions for session commands.
860fn generate_session_tool_defs(manifest: &Manifest) -> Vec<ToolDefinition> {
861    let session = match &manifest.session {
862        Some(s) => s,
863        None => return vec![generate_oneshot_tool_def(manifest)],
864    };
865    session
866        .commands
867        .iter()
868        .map(|(cmd_name, cmd_def)| {
869            let mut properties = serde_json::Map::new();
870            properties.insert(
871                "command".to_string(),
872                serde_json::json!({
873                    "type": "string",
874                    "description": format!("Command matching pattern: {}", cmd_def.pattern)
875                }),
876            );
877            for (arg_name, arg_def) in &cmd_def.args {
878                let mut prop = serde_json::Map::new();
879                prop.insert("type".to_string(), serde_json::json!("string"));
880                prop.insert(
881                    "description".to_string(),
882                    serde_json::json!(arg_def.description),
883                );
884                properties.insert(arg_name.clone(), serde_json::Value::Object(prop));
885            }
886            ToolDefinition {
887                name: format!("{}.{}", manifest.tool.name, cmd_name),
888                description: cmd_def.description.clone(),
889                parameters: serde_json::json!({
890                    "type": "object",
891                    "properties": properties,
892                    "required": ["command"]
893                }),
894            }
895        })
896        .collect()
897}
898
899/// Generate tool definitions for browser commands.
900fn generate_browser_tool_defs(manifest: &Manifest) -> Vec<ToolDefinition> {
901    let browser = match &manifest.browser {
902        Some(b) => b,
903        None => return vec![generate_oneshot_tool_def(manifest)],
904    };
905    browser
906        .commands
907        .iter()
908        .map(|(cmd_name, cmd_def)| {
909            let mut properties = serde_json::Map::new();
910            for (arg_name, arg_def) in &cmd_def.args {
911                let mut prop = serde_json::Map::new();
912                prop.insert("type".to_string(), serde_json::json!("string"));
913                prop.insert(
914                    "description".to_string(),
915                    serde_json::json!(arg_def.description),
916                );
917                if let Some(allowed) = &arg_def.allowed {
918                    prop.insert("enum".to_string(), serde_json::json!(allowed));
919                }
920                properties.insert(arg_name.clone(), serde_json::Value::Object(prop));
921            }
922            let required: Vec<_> = cmd_def
923                .args
924                .iter()
925                .filter(|(_, d)| d.required)
926                .map(|(n, _)| serde_json::json!(n))
927                .collect();
928            ToolDefinition {
929                name: format!("{}.{}", manifest.tool.name, cmd_name),
930                description: cmd_def.description.clone(),
931                parameters: serde_json::json!({
932                    "type": "object",
933                    "properties": properties,
934                    "required": required
935                }),
936            }
937        })
938        .collect()
939}
940
941/// Generate a single MCP tool definition for a oneshot manifest.
942fn generate_oneshot_tool_def(manifest: &Manifest) -> ToolDefinition {
943    let mut properties = serde_json::Map::new();
944    let mut required = Vec::new();
945
946    let mut sorted_args: Vec<_> = manifest.args.iter().collect();
947    sorted_args.sort_by_key(|(_, def)| def.position);
948
949    for (name, def) in &sorted_args {
950        let mut prop = serde_json::Map::new();
951        prop.insert("type".to_string(), serde_json::json!("string"));
952        prop.insert(
953            "description".to_string(),
954            serde_json::json!(def.description),
955        );
956        if let Some(allowed) = &def.allowed {
957            prop.insert("enum".to_string(), serde_json::json!(allowed));
958        }
959        if let Some(default) = &def.default {
960            prop.insert(
961                "default".to_string(),
962                serde_json::json!(default.to_string().trim_matches('"')),
963            );
964        }
965        properties.insert(name.to_string(), serde_json::Value::Object(prop));
966        if def.required {
967            required.push(serde_json::json!(name));
968        }
969    }
970
971    let parameters = serde_json::json!({
972        "type": "object",
973        "properties": properties,
974        "required": required
975    });
976
977    ToolDefinition {
978        name: manifest.tool.name.clone(),
979        description: manifest.tool.description.clone(),
980        parameters,
981    }
982}
983
984#[cfg(test)]
985mod tests {
986    use super::*;
987
988    #[test]
989    fn test_build_simple_command() {
990        let manifest: Manifest = toml::from_str(
991            r#"
992[tool]
993name = "echo_test"
994version = "1.0.0"
995binary = "echo"
996description = "Test"
997
998[args.message]
999position = 1
1000required = true
1001type = "string"
1002
1003[command]
1004template = "echo {message}"
1005
1006[output]
1007format = "text"
1008
1009[output.schema]
1010type = "object"
1011"#,
1012        )
1013        .unwrap();
1014        let mut args = HashMap::new();
1015        args.insert("message".to_string(), "hello".to_string());
1016        let cmd = build_command(&manifest, &args).unwrap();
1017        assert_eq!(cmd, "echo hello");
1018    }
1019
1020    #[test]
1021    fn test_build_command_with_defaults() {
1022        let manifest: Manifest = toml::from_str(
1023            r#"
1024[tool]
1025name = "test"
1026version = "1.0.0"
1027binary = "test"
1028description = "Test"
1029
1030[args.target]
1031position = 1
1032required = true
1033type = "string"
1034
1035[command]
1036template = "scan --rate {rate} {target}"
1037
1038[command.defaults]
1039rate = 100
1040
1041[output]
1042format = "text"
1043
1044[output.schema]
1045type = "object"
1046"#,
1047        )
1048        .unwrap();
1049        let mut args = HashMap::new();
1050        args.insert("target".to_string(), "example.com".to_string());
1051        let cmd = build_command(&manifest, &args).unwrap();
1052        assert_eq!(cmd, "scan --rate 100 example.com");
1053    }
1054
1055    #[test]
1056    fn test_generate_oneshot_tool_def() {
1057        let manifest: Manifest = toml::from_str(
1058            r#"
1059[tool]
1060name = "whois"
1061version = "1.0.0"
1062binary = "whois"
1063description = "WHOIS lookup"
1064
1065[args.target]
1066position = 1
1067required = true
1068type = "scope_target"
1069description = "Domain or IP"
1070
1071[command]
1072template = "whois {target}"
1073
1074[output]
1075format = "text"
1076
1077[output.schema]
1078type = "object"
1079"#,
1080        )
1081        .unwrap();
1082        let td = generate_oneshot_tool_def(&manifest);
1083        assert_eq!(td.name, "whois");
1084        assert_eq!(td.description, "WHOIS lookup");
1085        let required = td.parameters["required"].as_array().unwrap();
1086        assert!(required.contains(&serde_json::json!("target")));
1087    }
1088
1089    // ---- Parser Tests ----
1090
1091    #[test]
1092    fn test_parse_json_valid() {
1093        let result = parse_json(r#"{"key": "value", "count": 42}"#).unwrap();
1094        assert_eq!(result["key"], "value");
1095        assert_eq!(result["count"], 42);
1096    }
1097
1098    #[test]
1099    fn test_parse_json_invalid() {
1100        let result = parse_json("not json at all");
1101        assert!(result.is_err());
1102    }
1103
1104    #[test]
1105    fn test_parse_csv_basic() {
1106        let csv = "name,age,city\nAlice,30,NYC\nBob,25,LA";
1107        let result = parse_csv(csv).unwrap();
1108        let rows = result.as_array().unwrap();
1109        assert_eq!(rows.len(), 2);
1110        assert_eq!(rows[0]["name"], "Alice");
1111        assert_eq!(rows[0]["age"], "30");
1112        assert_eq!(rows[1]["city"], "LA");
1113    }
1114
1115    #[test]
1116    fn test_parse_csv_empty_body() {
1117        let csv = "name,age";
1118        let result = parse_csv(csv).unwrap();
1119        let rows = result.as_array().unwrap();
1120        assert!(rows.is_empty());
1121    }
1122
1123    #[test]
1124    fn test_parse_csv_no_header() {
1125        let result = parse_csv("");
1126        assert!(result.is_err());
1127    }
1128
1129    #[test]
1130    fn test_parse_jsonl_valid() {
1131        let jsonl = r#"{"a":1}
1132{"b":2}
1133{"c":3}"#;
1134        let result = parse_jsonl(jsonl).unwrap();
1135        let items = result.as_array().unwrap();
1136        assert_eq!(items.len(), 3);
1137        assert_eq!(items[0]["a"], 1);
1138        assert_eq!(items[2]["c"], 3);
1139    }
1140
1141    #[test]
1142    fn test_parse_jsonl_with_blanks() {
1143        let jsonl = r#"{"a":1}
1144
1145{"b":2}
1146"#;
1147        let result = parse_jsonl(jsonl).unwrap();
1148        let items = result.as_array().unwrap();
1149        assert_eq!(items.len(), 2);
1150    }
1151
1152    #[test]
1153    fn test_parse_jsonl_invalid_line() {
1154        let jsonl = "{\"a\":1}\nnot json";
1155        let result = parse_jsonl(jsonl);
1156        assert!(result.is_err());
1157        assert!(result.unwrap_err().contains("line 2"));
1158    }
1159
1160    #[test]
1161    fn test_parse_xml_wraps() {
1162        let xml = "<root><item>hello</item></root>";
1163        let result = parse_xml(xml).unwrap();
1164        assert_eq!(result["xml_output"], xml);
1165        assert!(result.get("_note").is_some());
1166    }
1167
1168    #[test]
1169    fn test_parse_output_default_text() {
1170        let manifest: Manifest = toml::from_str(
1171            r#"
1172[tool]
1173name = "test"
1174version = "1.0.0"
1175binary = "test"
1176description = "Test"
1177
1178[command]
1179template = "test"
1180
1181[output]
1182format = "text"
1183
1184[output.schema]
1185type = "object"
1186"#,
1187        )
1188        .unwrap();
1189        let result = parse_output(&manifest, "hello world").unwrap();
1190        assert_eq!(result["raw_output"], "hello world");
1191    }
1192
1193    #[test]
1194    fn test_parse_output_json_format() {
1195        let manifest: Manifest = toml::from_str(
1196            r#"
1197[tool]
1198name = "test"
1199version = "1.0.0"
1200binary = "test"
1201description = "Test"
1202
1203[command]
1204template = "test"
1205
1206[output]
1207format = "json"
1208
1209[output.schema]
1210type = "object"
1211"#,
1212        )
1213        .unwrap();
1214        let result = parse_output(&manifest, r#"{"status":"ok"}"#).unwrap();
1215        assert_eq!(result["status"], "ok");
1216    }
1217
1218    #[test]
1219    fn test_parse_output_explicit_parser() {
1220        let manifest: Manifest = toml::from_str(
1221            r#"
1222[tool]
1223name = "test"
1224version = "1.0.0"
1225binary = "test"
1226description = "Test"
1227
1228[command]
1229template = "test"
1230
1231[output]
1232format = "text"
1233parser = "builtin:csv"
1234
1235[output.schema]
1236type = "object"
1237"#,
1238        )
1239        .unwrap();
1240        let result = parse_output(&manifest, "a,b\n1,2").unwrap();
1241        let rows = result.as_array().unwrap();
1242        assert_eq!(rows.len(), 1);
1243        assert_eq!(rows[0]["a"], "1");
1244    }
1245
1246    // ---- Schema Validation Tests ----
1247
1248    #[test]
1249    fn test_validate_schema_no_properties() {
1250        let parsed = serde_json::json!({"foo": "bar"});
1251        let schema = serde_json::json!({"type": "object"});
1252        let warnings = validate_output_schema(&parsed, &schema);
1253        assert!(warnings.is_empty());
1254    }
1255
1256    #[test]
1257    fn test_validate_schema_missing_required() {
1258        let parsed = serde_json::json!({"foo": "bar"});
1259        let schema = serde_json::json!({
1260            "type": "object",
1261            "required": ["missing_key"],
1262            "properties": {
1263                "missing_key": {"type": "string"}
1264            }
1265        });
1266        let warnings = validate_output_schema(&parsed, &schema);
1267        assert_eq!(warnings.len(), 1);
1268        assert!(warnings[0].contains("missing_key"));
1269    }
1270
1271    #[test]
1272    fn test_validate_schema_type_mismatch() {
1273        let parsed = serde_json::json!({"count": "not_a_number"});
1274        let schema = serde_json::json!({
1275            "type": "object",
1276            "properties": {
1277                "count": {"type": "number"}
1278            }
1279        });
1280        let warnings = validate_output_schema(&parsed, &schema);
1281        assert_eq!(warnings.len(), 1);
1282        assert!(warnings[0].contains("count"));
1283        assert!(warnings[0].contains("number"));
1284    }
1285
1286    #[test]
1287    fn test_validate_schema_raw_output_skips() {
1288        let parsed = serde_json::json!({"raw_output": "some text"});
1289        let schema = serde_json::json!({
1290            "type": "object",
1291            "required": ["specific_field"],
1292            "properties": {
1293                "specific_field": {"type": "string"}
1294            }
1295        });
1296        let warnings = validate_output_schema(&parsed, &schema);
1297        assert!(warnings.is_empty());
1298    }
1299
1300    #[test]
1301    fn test_validate_schema_all_types() {
1302        let parsed = serde_json::json!({
1303            "s": "hello",
1304            "n": 42,
1305            "b": true,
1306            "a": [1, 2],
1307            "o": {"nested": true}
1308        });
1309        let schema = serde_json::json!({
1310            "type": "object",
1311            "properties": {
1312                "s": {"type": "string"},
1313                "n": {"type": "number"},
1314                "b": {"type": "boolean"},
1315                "a": {"type": "array"},
1316                "o": {"type": "object"}
1317            }
1318        });
1319        let warnings = validate_output_schema(&parsed, &schema);
1320        assert!(warnings.is_empty());
1321    }
1322
1323    #[test]
1324    fn test_manifest_version_recorded() {
1325        let manifest: Manifest = toml::from_str(
1326            r#"
1327[tool]
1328name = "versioned"
1329version = "2.5.0"
1330binary = "echo"
1331description = "Test"
1332
1333[command]
1334template = "echo test"
1335
1336[output]
1337format = "text"
1338
1339[output.schema]
1340type = "object"
1341"#,
1342        )
1343        .unwrap();
1344        let executor = ToolCladExecutor::new(vec![("versioned".to_string(), manifest)]);
1345        assert_eq!(
1346            executor.manifest_versions.get("versioned").unwrap(),
1347            "2.5.0"
1348        );
1349    }
1350
1351    // ---- MCP Proxy Tests ----
1352
1353    #[test]
1354    fn test_mcp_proxy_tool_def_generation() {
1355        let manifest: Manifest = toml::from_str(
1356            r#"
1357[tool]
1358name = "governed_search"
1359version = "1.0.0"
1360description = "Search via governed MCP proxy"
1361
1362[args.query]
1363position = 1
1364required = true
1365type = "string"
1366description = "Search query"
1367
1368[args.max_results]
1369position = 2
1370required = false
1371type = "integer"
1372description = "Maximum results to return"
1373default = 10
1374
1375[mcp]
1376server = "brave-search"
1377tool = "brave_web_search"
1378
1379[mcp.field_map]
1380query = "q"
1381max_results = "count"
1382
1383[output]
1384format = "json"
1385
1386[output.schema]
1387type = "object"
1388"#,
1389        )
1390        .unwrap();
1391        let td = generate_oneshot_tool_def(&manifest);
1392        assert_eq!(td.name, "governed_search");
1393        assert_eq!(td.description, "Search via governed MCP proxy");
1394        let props = td.parameters["properties"].as_object().unwrap();
1395        assert!(props.contains_key("query"));
1396        assert!(props.contains_key("max_results"));
1397        let required = td.parameters["required"].as_array().unwrap();
1398        assert!(required.contains(&serde_json::json!("query")));
1399    }
1400
1401    #[test]
1402    fn test_mcp_proxy_execution_returns_delegated_envelope() {
1403        let manifest: Manifest = toml::from_str(
1404            r#"
1405[tool]
1406name = "governed_search"
1407version = "1.0.0"
1408description = "Search via governed MCP proxy"
1409
1410[args.query]
1411position = 1
1412required = true
1413type = "string"
1414description = "Search query"
1415
1416[mcp]
1417server = "brave-search"
1418tool = "brave_web_search"
1419
1420[mcp.field_map]
1421query = "q"
1422
1423[output]
1424format = "json"
1425
1426[output.schema]
1427type = "object"
1428"#,
1429        )
1430        .unwrap();
1431
1432        let executor =
1433            ToolCladExecutor::new(vec![("governed_search".to_string(), manifest.clone())]);
1434
1435        let mut args = HashMap::new();
1436        args.insert("query".to_string(), "rust async".to_string());
1437        let result = executor
1438            .execute_mcp_backend("governed_search", &manifest, &args)
1439            .unwrap();
1440
1441        assert_eq!(result["status"], "delegated");
1442        assert_eq!(result["tool"], "governed_search");
1443        assert_eq!(result["mcp_server"], "brave-search");
1444        assert_eq!(result["mcp_tool"], "brave_web_search");
1445        assert_eq!(result["exit_code"], 0);
1446
1447        // Check that field mapping was applied
1448        let mcp_args = &result["mcp_arguments"];
1449        assert_eq!(mcp_args["q"], "rust async");
1450    }
1451
1452    #[test]
1453    fn test_mcp_proxy_field_map_passthrough() {
1454        let manifest: Manifest = toml::from_str(
1455            r#"
1456[tool]
1457name = "passthrough"
1458version = "1.0.0"
1459description = "Direct passthrough"
1460
1461[args.input]
1462position = 1
1463required = true
1464type = "string"
1465description = "Input value"
1466
1467[mcp]
1468server = "my-server"
1469tool = "upstream_tool"
1470
1471[output]
1472format = "json"
1473
1474[output.schema]
1475type = "object"
1476"#,
1477        )
1478        .unwrap();
1479
1480        let executor = ToolCladExecutor::new(vec![("passthrough".to_string(), manifest.clone())]);
1481
1482        let mut args = HashMap::new();
1483        args.insert("input".to_string(), "hello".to_string());
1484        let result = executor
1485            .execute_mcp_backend("passthrough", &manifest, &args)
1486            .unwrap();
1487
1488        // No field_map, so "input" stays as "input" in upstream args
1489        let mcp_args = &result["mcp_arguments"];
1490        assert_eq!(mcp_args["input"], "hello");
1491    }
1492
1493    #[test]
1494    fn test_mcp_proxy_dispatch_via_execute_tool() {
1495        let manifest: Manifest = toml::from_str(
1496            r#"
1497[tool]
1498name = "mcp_tool"
1499version = "1.0.0"
1500description = "MCP proxy tool"
1501
1502[args.query]
1503position = 1
1504required = true
1505type = "string"
1506description = "Query"
1507
1508[mcp]
1509server = "test-server"
1510tool = "test_tool"
1511
1512[output]
1513format = "json"
1514
1515[output.schema]
1516type = "object"
1517"#,
1518        )
1519        .unwrap();
1520
1521        let executor = ToolCladExecutor::new(vec![("mcp_tool".to_string(), manifest)]);
1522
1523        let result = executor
1524            .execute_tool("mcp_tool", r#"{"query": "test"}"#)
1525            .unwrap();
1526
1527        assert_eq!(result["status"], "delegated");
1528        assert_eq!(result["mcp_server"], "test-server");
1529        assert_eq!(result["mcp_tool"], "test_tool");
1530    }
1531}