Skip to main content

symbi_runtime/toolclad/
manifest.rs

1//! ToolClad manifest parsing
2//!
3//! Parses `.clad.toml` files into typed `Manifest` structs.
4
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use std::path::Path;
8
9/// HTTP backend configuration for oneshot API tools.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct HttpDef {
12    pub method: String,
13    pub url: String,
14    #[serde(default)]
15    pub headers: HashMap<String, String>,
16    pub body_template: Option<String>,
17    #[serde(default)]
18    pub success_status: Vec<u16>,
19    #[serde(default)]
20    pub error_status: Vec<u16>,
21}
22
23/// MCP proxy backend configuration for governed MCP tool passthrough.
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct McpProxyDef {
26    /// Named MCP server connection (from symbiont.toml).
27    pub server: String,
28    /// Upstream MCP tool name to invoke.
29    pub tool: String,
30    /// Field mapping from manifest args to upstream tool args.
31    #[serde(default)]
32    pub field_map: HashMap<String, String>,
33}
34
35/// A parsed ToolClad manifest.
36#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct Manifest {
38    pub tool: ToolMeta,
39    #[serde(default)]
40    pub args: HashMap<String, ArgDef>,
41    #[serde(default)]
42    pub command: CommandDef,
43    pub output: OutputDef,
44    /// HTTP backend configuration (oneshot API tools).
45    pub http: Option<HttpDef>,
46    /// MCP proxy backend (for governed MCP tool passthrough).
47    pub mcp: Option<McpProxyDef>,
48    /// Session mode configuration (for interactive CLI tools).
49    pub session: Option<SessionDef>,
50    /// Browser mode configuration (for headless browser sessions).
51    pub browser: Option<BrowserDef>,
52}
53
54/// Tool metadata.
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct ToolMeta {
57    pub name: String,
58    pub version: String,
59    #[serde(default)]
60    pub binary: String,
61    pub description: String,
62    /// Execution mode: "oneshot" (default), "session", or "browser".
63    #[serde(default = "default_mode")]
64    pub mode: String,
65    #[serde(default = "default_timeout")]
66    pub timeout_seconds: u64,
67    #[serde(default = "default_risk_tier")]
68    pub risk_tier: String,
69    #[serde(default)]
70    pub human_approval: bool,
71    pub cedar: Option<CedarMeta>,
72    pub evidence: Option<EvidenceMeta>,
73}
74
75fn default_mode() -> String {
76    "oneshot".to_string()
77}
78
79fn default_timeout() -> u64 {
80    30
81}
82fn default_risk_tier() -> String {
83    "low".to_string()
84}
85
86/// Cedar policy metadata.
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct CedarMeta {
89    pub resource: String,
90    pub action: String,
91}
92
93/// Evidence capture configuration.
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct EvidenceMeta {
96    pub output_dir: String,
97    #[serde(default = "default_true")]
98    pub capture: bool,
99    #[serde(default = "default_hash")]
100    pub hash: String,
101}
102
103fn default_true() -> bool {
104    true
105}
106fn default_hash() -> String {
107    "sha256".to_string()
108}
109
110/// Argument definition.
111#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct ArgDef {
113    pub position: u32,
114    #[serde(default)]
115    pub required: bool,
116    #[serde(rename = "type")]
117    pub type_name: String,
118    #[serde(default)]
119    pub description: String,
120    pub allowed: Option<Vec<String>>,
121    pub default: Option<toml::Value>,
122    pub pattern: Option<String>,
123    pub sanitize: Option<Vec<String>>,
124    pub min: Option<i64>,
125    pub max: Option<i64>,
126    #[serde(default)]
127    pub clamp: bool,
128    pub schemes: Option<Vec<String>>,
129    #[serde(default)]
130    pub scope_check: bool,
131}
132
133/// Command construction definition (oneshot mode).
134#[derive(Debug, Clone, Default, Serialize, Deserialize)]
135pub struct CommandDef {
136    pub template: Option<String>,
137    pub executor: Option<String>,
138    #[serde(default)]
139    pub defaults: HashMap<String, toml::Value>,
140    #[serde(default)]
141    pub mappings: HashMap<String, HashMap<String, String>>,
142    #[serde(default)]
143    pub conditionals: HashMap<String, ConditionalDef>,
144}
145
146/// Conditional command fragment.
147#[derive(Debug, Clone, Serialize, Deserialize)]
148pub struct ConditionalDef {
149    pub when: String,
150    pub template: String,
151}
152
153/// Output configuration.
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct OutputDef {
156    pub format: String,
157    pub parser: Option<String>,
158    #[serde(default = "default_true")]
159    pub envelope: bool,
160    #[serde(default)]
161    pub schema: serde_json::Value,
162}
163
164// ---- Session Mode Types ----
165
166/// Session mode configuration for interactive CLI tools.
167#[derive(Debug, Clone, Serialize, Deserialize)]
168pub struct SessionDef {
169    /// Command to start the tool process.
170    pub startup_command: String,
171    /// Regex pattern matching the tool's ready prompt.
172    pub ready_pattern: String,
173    #[serde(default = "default_timeout")]
174    pub startup_timeout_seconds: u64,
175    #[serde(default = "default_session_idle")]
176    pub idle_timeout_seconds: u64,
177    #[serde(default = "default_session_timeout")]
178    pub session_timeout_seconds: u64,
179    #[serde(default = "default_max_interactions")]
180    pub max_interactions: u32,
181    /// Per-interaction settings.
182    pub interaction: Option<SessionInteractionDef>,
183    /// Allowed session commands (the allow-list).
184    #[serde(default)]
185    pub commands: HashMap<String, SessionCommandDef>,
186}
187
188fn default_session_idle() -> u64 {
189    300
190}
191fn default_session_timeout() -> u64 {
192    1800
193}
194fn default_max_interactions() -> u32 {
195    100
196}
197
198/// Session interaction configuration.
199#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct SessionInteractionDef {
201    #[serde(default)]
202    pub input_sanitize: Vec<String>,
203    #[serde(default = "default_output_max")]
204    pub output_max_bytes: u64,
205    #[serde(default = "default_output_wait")]
206    pub output_wait_ms: u64,
207}
208
209fn default_output_max() -> u64 {
210    1_048_576
211}
212fn default_output_wait() -> u64 {
213    2000
214}
215
216/// A declared session command (becomes an MCP tool).
217#[derive(Debug, Clone, Serialize, Deserialize)]
218pub struct SessionCommandDef {
219    /// Regex pattern the command must match.
220    pub pattern: String,
221    pub description: String,
222    #[serde(default = "default_risk_tier")]
223    pub risk_tier: String,
224    #[serde(default)]
225    pub human_approval: bool,
226    /// If true, extract target from command for scope checking.
227    #[serde(default)]
228    pub extract_target: bool,
229    /// Optional command-specific args.
230    #[serde(default)]
231    pub args: HashMap<String, ArgDef>,
232}
233
234// ---- Browser Mode Types ----
235
236/// Browser mode configuration for headless or live browser sessions.
237#[derive(Debug, Clone, Serialize, Deserialize)]
238pub struct BrowserDef {
239    #[serde(default = "default_browser_engine")]
240    pub engine: String,
241    #[serde(default = "default_true")]
242    pub headless: bool,
243    /// "launch" (spawn headless) or "live" (attach to running Chrome).
244    #[serde(default = "default_connect")]
245    pub connect: String,
246    /// "accessibility_tree" | "html" | "text" — default content extraction mode.
247    #[serde(default = "default_extract_mode")]
248    pub extract_mode: String,
249    #[serde(default = "default_timeout")]
250    pub startup_timeout_seconds: u64,
251    #[serde(default = "default_session_timeout")]
252    pub session_timeout_seconds: u64,
253    #[serde(default = "default_session_idle")]
254    pub idle_timeout_seconds: u64,
255    #[serde(default = "default_max_interactions")]
256    pub max_interactions: u32,
257    /// URL scope enforcement.
258    pub scope: Option<BrowserScopeDef>,
259    /// Allowed browser commands.
260    #[serde(default)]
261    pub commands: HashMap<String, BrowserCommandDef>,
262    /// State inference configuration.
263    pub state: Option<BrowserStateDef>,
264}
265
266fn default_browser_engine() -> String {
267    "cdp".to_string()
268}
269fn default_connect() -> String {
270    "launch".to_string()
271}
272fn default_extract_mode() -> String {
273    "accessibility_tree".to_string()
274}
275
276/// Browser URL scope enforcement.
277#[derive(Debug, Clone, Serialize, Deserialize)]
278pub struct BrowserScopeDef {
279    #[serde(default)]
280    pub allowed_domains: Vec<String>,
281    #[serde(default)]
282    pub blocked_domains: Vec<String>,
283    #[serde(default)]
284    pub allow_external: bool,
285}
286
287/// A declared browser command.
288#[derive(Debug, Clone, Serialize, Deserialize)]
289pub struct BrowserCommandDef {
290    pub description: String,
291    #[serde(default = "default_risk_tier")]
292    pub risk_tier: String,
293    #[serde(default)]
294    pub human_approval: bool,
295    /// Command-specific args.
296    #[serde(default)]
297    pub args: HashMap<String, ArgDef>,
298}
299
300/// Browser state inference fields.
301#[derive(Debug, Clone, Serialize, Deserialize)]
302pub struct BrowserStateDef {
303    #[serde(default)]
304    pub fields: Vec<String>,
305}
306
307/// Load a single manifest from a `.clad.toml` file.
308pub fn load_manifest(path: &Path) -> Result<Manifest, String> {
309    let content = std::fs::read_to_string(path)
310        .map_err(|e| format!("Failed to read {}: {}", path.display(), e))?;
311    toml::from_str(&content).map_err(|e| format!("Failed to parse {}: {}", path.display(), e))
312}
313
314/// Load all manifests from a directory.
315pub fn load_manifests_from_dir(dir: &Path) -> Vec<(String, Manifest)> {
316    let mut manifests = Vec::new();
317    if !dir.exists() || !dir.is_dir() {
318        return manifests;
319    }
320    if let Ok(entries) = std::fs::read_dir(dir) {
321        for entry in entries.flatten() {
322            let path = entry.path();
323            if path.extension().map(|e| e == "toml").unwrap_or(false)
324                && path
325                    .file_name()
326                    .map(|n| n.to_string_lossy().ends_with(".clad.toml"))
327                    .unwrap_or(false)
328            {
329                match load_manifest(&path) {
330                    Ok(manifest) => {
331                        let name = manifest.tool.name.clone();
332                        manifests.push((name, manifest));
333                    }
334                    Err(e) => {
335                        eprintln!("  ⚠ Failed to load {}: {}", path.display(), e);
336                    }
337                }
338            }
339        }
340    }
341    manifests
342}
343
344/// Load custom type definitions from `toolclad.toml` at the project root.
345///
346/// The file uses `[types.*]` sections where each entry has a `base` field
347/// (mapped to `type_name`) and other `ArgDef` fields. For example:
348///
349/// ```toml
350/// [types.service_protocol]
351/// base = "enum"
352/// allowed = ["ssh", "ftp", "http"]
353/// ```
354pub fn load_custom_types(project_dir: &Path) -> HashMap<String, ArgDef> {
355    let path = project_dir.join("toolclad.toml");
356    if !path.exists() {
357        return HashMap::new();
358    }
359    let content = match std::fs::read_to_string(&path) {
360        Ok(c) => c,
361        Err(e) => {
362            eprintln!("  Warning: failed to read {}: {}", path.display(), e);
363            return HashMap::new();
364        }
365    };
366    let table: toml::Value = match toml::from_str(&content) {
367        Ok(v) => v,
368        Err(e) => {
369            eprintln!("  Warning: failed to parse {}: {}", path.display(), e);
370            return HashMap::new();
371        }
372    };
373    let types_table = match table.get("types").and_then(|t| t.as_table()) {
374        Some(t) => t,
375        None => return HashMap::new(),
376    };
377    let mut result = HashMap::new();
378    for (name, value) in types_table {
379        let tbl = match value.as_table() {
380            Some(t) => t,
381            None => continue,
382        };
383        let base = match tbl.get("base").and_then(|b| b.as_str()) {
384            Some(b) => b.to_string(),
385            None => {
386                eprintln!(
387                    "  Warning: custom type '{}' missing 'base' field, skipping",
388                    name
389                );
390                continue;
391            }
392        };
393        // Build an ArgDef from the table, using `base` as the type_name
394        let allowed = tbl.get("allowed").and_then(|a| {
395            a.as_array().map(|arr| {
396                arr.iter()
397                    .filter_map(|v| v.as_str().map(String::from))
398                    .collect()
399            })
400        });
401        let pattern = tbl
402            .get("pattern")
403            .and_then(|p| p.as_str())
404            .map(String::from);
405        let min = tbl.get("min").and_then(|v| v.as_integer());
406        let max = tbl.get("max").and_then(|v| v.as_integer());
407        let clamp = tbl.get("clamp").and_then(|v| v.as_bool()).unwrap_or(false);
408        let schemes = tbl.get("schemes").and_then(|s| {
409            s.as_array().map(|arr| {
410                arr.iter()
411                    .filter_map(|v| v.as_str().map(String::from))
412                    .collect()
413            })
414        });
415        let scope_check = tbl
416            .get("scope_check")
417            .and_then(|v| v.as_bool())
418            .unwrap_or(false);
419        let description = tbl
420            .get("description")
421            .and_then(|d| d.as_str())
422            .unwrap_or("")
423            .to_string();
424
425        result.insert(
426            name.clone(),
427            ArgDef {
428                position: 0,
429                required: false,
430                type_name: base,
431                description,
432                allowed,
433                default: None,
434                pattern,
435                sanitize: None,
436                min,
437                max,
438                clamp,
439                schemes,
440                scope_check,
441            },
442        );
443    }
444    result
445}
446
447#[cfg(test)]
448mod tests {
449    use super::*;
450
451    #[test]
452    fn test_parse_minimal_manifest() {
453        let toml_str = r#"
454[tool]
455name = "test_tool"
456version = "1.0.0"
457binary = "echo"
458description = "A test tool"
459
460[args.message]
461position = 1
462required = true
463type = "string"
464description = "Message to echo"
465
466[command]
467template = "echo {message}"
468
469[output]
470format = "text"
471
472[output.schema]
473type = "object"
474"#;
475        let manifest: Manifest = toml::from_str(toml_str).unwrap();
476        assert_eq!(manifest.tool.name, "test_tool");
477        assert_eq!(manifest.tool.binary, "echo");
478        assert_eq!(manifest.tool.mode, "oneshot");
479        assert!(manifest.args.contains_key("message"));
480        assert_eq!(manifest.args["message"].type_name, "string");
481        assert_eq!(
482            manifest.command.template,
483            Some("echo {message}".to_string())
484        );
485        assert!(manifest.mcp.is_none());
486        assert!(manifest.http.is_none());
487    }
488
489    #[test]
490    fn test_parse_manifest_with_mappings() {
491        let toml_str = r#"
492[tool]
493name = "nmap"
494version = "1.0.0"
495binary = "nmap"
496description = "Scanner"
497
498[args.target]
499position = 1
500required = true
501type = "scope_target"
502
503[args.scan_type]
504position = 2
505required = true
506type = "enum"
507allowed = ["ping", "service"]
508
509[command]
510template = "nmap {_scan_flags} {target}"
511
512[command.mappings.scan_type]
513ping = "-sn"
514service = "-sT -sV"
515
516[output]
517format = "text"
518
519[output.schema]
520type = "object"
521"#;
522        let manifest: Manifest = toml::from_str(toml_str).unwrap();
523        assert_eq!(manifest.command.mappings["scan_type"]["ping"], "-sn");
524    }
525
526    #[test]
527    fn test_parse_mcp_proxy_manifest() {
528        let toml_str = r#"
529[tool]
530name = "governed_search"
531version = "1.0.0"
532description = "Search via governed MCP proxy"
533mode = "oneshot"
534
535[tool.cedar]
536resource = "Tool::Search"
537action = "execute_search"
538
539[args.query]
540position = 1
541required = true
542type = "string"
543description = "Search query"
544
545[args.max_results]
546position = 2
547required = false
548type = "integer"
549description = "Maximum results to return"
550default = 10
551
552[mcp]
553server = "brave-search"
554tool = "brave_web_search"
555
556[mcp.field_map]
557query = "q"
558max_results = "count"
559
560[output]
561format = "json"
562
563[output.schema]
564type = "object"
565"#;
566        let manifest: Manifest = toml::from_str(toml_str).unwrap();
567        assert_eq!(manifest.tool.name, "governed_search");
568        let mcp = manifest.mcp.as_ref().unwrap();
569        assert_eq!(mcp.server, "brave-search");
570        assert_eq!(mcp.tool, "brave_web_search");
571        assert_eq!(mcp.field_map.get("query").unwrap(), "q");
572        assert_eq!(mcp.field_map.get("max_results").unwrap(), "count");
573    }
574
575    #[test]
576    fn test_parse_mcp_proxy_no_field_map() {
577        let toml_str = r#"
578[tool]
579name = "passthrough_tool"
580version = "1.0.0"
581description = "Direct passthrough to MCP tool"
582
583[args.input]
584position = 1
585required = true
586type = "string"
587description = "Input value"
588
589[mcp]
590server = "my-server"
591tool = "upstream_tool"
592
593[output]
594format = "json"
595
596[output.schema]
597type = "object"
598"#;
599        let manifest: Manifest = toml::from_str(toml_str).unwrap();
600        let mcp = manifest.mcp.as_ref().unwrap();
601        assert_eq!(mcp.server, "my-server");
602        assert_eq!(mcp.tool, "upstream_tool");
603        assert!(mcp.field_map.is_empty());
604    }
605}