Skip to main content

tokmd_tool_schema/
lib.rs

1//! Tool-schema generation for AI agent tool use.
2//!
3//! This crate introspects a clap `Command` tree and produces schema output in
4//! formats commonly consumed by AI tooling.
5
6use anyhow::Result;
7use clap::{Arg, ArgAction, Command};
8use serde::{Deserialize, Serialize};
9use serde_json::{Value, json};
10use std::collections::BTreeMap;
11
12/// Output format for rendered tool schemas.
13#[derive(clap::ValueEnum, Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
14#[serde(rename_all = "kebab-case")]
15pub enum ToolSchemaFormat {
16    /// OpenAI function calling format.
17    Openai,
18    /// Anthropic tool use format.
19    Anthropic,
20    /// JSON Schema Draft 7 format.
21    #[default]
22    Jsonschema,
23    /// Raw clap structure dump.
24    Clap,
25}
26
27/// Schema version for tool definitions.
28pub const TOOL_SCHEMA_VERSION: u32 = 1;
29
30/// Top-level schema output with envelope metadata.
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct ToolSchemaOutput {
33    /// Schema version.
34    pub schema_version: u32,
35
36    /// Tool name.
37    pub name: String,
38
39    /// Tool version.
40    pub version: String,
41
42    /// Tool description.
43    pub description: String,
44
45    /// Available commands/tools.
46    pub tools: Vec<ToolDefinition>,
47}
48
49/// Definition of a single command/tool.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct ToolDefinition {
52    /// Command name.
53    pub name: String,
54
55    /// Command description.
56    pub description: String,
57
58    /// Parameters/arguments.
59    pub parameters: Vec<ParameterSchema>,
60}
61
62/// Schema for a single parameter/argument.
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct ParameterSchema {
65    /// Parameter name.
66    pub name: String,
67
68    /// Parameter description.
69    #[serde(skip_serializing_if = "Option::is_none")]
70    pub description: Option<String>,
71
72    /// Parameter type.
73    #[serde(rename = "type")]
74    pub param_type: String,
75
76    /// Whether the parameter is required.
77    pub required: bool,
78
79    /// Default value if any.
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub default: Option<String>,
82
83    /// Enum values if applicable.
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub enum_values: Option<Vec<String>>,
86}
87
88/// Build the tool schema from a clap `Command`.
89pub fn build_tool_schema(cmd: &Command) -> ToolSchemaOutput {
90    let mut tools = Vec::new();
91
92    // Add the root command as a tool (for default lang mode).
93    tools.push(build_tool_definition(cmd, None));
94
95    // Add all subcommands.
96    for subcmd in cmd.get_subcommands() {
97        // Skip generated help subcommand.
98        let name = subcmd.get_name();
99        if name == "help" {
100            continue;
101        }
102        tools.push(build_tool_definition(subcmd, Some(name)));
103    }
104
105    ToolSchemaOutput {
106        schema_version: TOOL_SCHEMA_VERSION,
107        name: cmd.get_name().to_string(),
108        version: cmd.get_version().unwrap_or("unknown").to_string(),
109        description: cmd.get_about().map(|s| s.to_string()).unwrap_or_default(),
110        tools,
111    }
112}
113
114/// Build a tool definition from a command.
115fn build_tool_definition(cmd: &Command, name_override: Option<&str>) -> ToolDefinition {
116    let name = name_override.unwrap_or(cmd.get_name()).to_string();
117    let description = cmd.get_about().map(|s| s.to_string()).unwrap_or_default();
118
119    let mut parameters = Vec::new();
120
121    // Add arguments.
122    for arg in cmd.get_arguments() {
123        // Skip generated args.
124        if arg.get_id() == "help" || arg.get_id() == "version" {
125            continue;
126        }
127        parameters.push(build_parameter_schema(arg));
128    }
129
130    ToolDefinition {
131        name,
132        description,
133        parameters,
134    }
135}
136
137/// Build a parameter schema from a clap `Arg`.
138fn build_parameter_schema(arg: &Arg) -> ParameterSchema {
139    let name = arg.get_id().to_string();
140    let description = arg.get_help().map(|s| s.to_string());
141
142    // Determine type based on action and value hints.
143    let param_type = determine_param_type(arg);
144
145    // Check if required.
146    let required = arg.is_required_set();
147
148    // Get default value.
149    let default = arg
150        .get_default_values()
151        .first()
152        .map(|v| v.to_string_lossy().to_string());
153
154    // Get enum values if applicable.
155    let enum_values = arg
156        .get_possible_values()
157        .iter()
158        .map(|v| v.get_name().to_string())
159        .collect::<Vec<_>>();
160    let enum_values = if enum_values.is_empty() {
161        None
162    } else {
163        Some(enum_values)
164    };
165
166    ParameterSchema {
167        name,
168        description,
169        param_type,
170        required,
171        default,
172        enum_values,
173    }
174}
175
176/// Determine the parameter type from a clap `Arg`.
177fn determine_param_type(arg: &Arg) -> String {
178    match arg.get_action() {
179        ArgAction::SetTrue | ArgAction::SetFalse => "boolean".to_string(),
180        ArgAction::Count => "integer".to_string(),
181        ArgAction::Append => "array".to_string(),
182        _ => "string".to_string(),
183    }
184}
185
186/// Render the schema output in the specified format.
187pub fn render_output(
188    schema: &ToolSchemaOutput,
189    format: ToolSchemaFormat,
190    pretty: bool,
191) -> Result<String> {
192    match format {
193        ToolSchemaFormat::Jsonschema => render_jsonschema(schema, pretty),
194        ToolSchemaFormat::Openai => render_openai(schema, pretty),
195        ToolSchemaFormat::Anthropic => render_anthropic(schema, pretty),
196        ToolSchemaFormat::Clap => render_clap(schema, pretty),
197    }
198}
199
200/// Render as JSON Schema format.
201fn render_jsonschema(schema: &ToolSchemaOutput, pretty: bool) -> Result<String> {
202    let tools_schema: Vec<Value> = schema
203        .tools
204        .iter()
205        .map(|tool| {
206            let properties: BTreeMap<String, Value> = tool
207                .parameters
208                .iter()
209                .map(|p| {
210                    let mut prop = json!({
211                        "type": p.param_type,
212                    });
213
214                    if let Some(desc) = &p.description {
215                        prop["description"] = json!(desc);
216                    }
217                    if let Some(def) = &p.default {
218                        prop["default"] = json!(def);
219                    }
220                    if let Some(enums) = &p.enum_values {
221                        prop["enum"] = json!(enums);
222                    }
223
224                    (p.name.clone(), prop)
225                })
226                .collect();
227
228            let required: Vec<&str> = tool
229                .parameters
230                .iter()
231                .filter(|p| p.required)
232                .map(|p| p.name.as_str())
233                .collect();
234
235            json!({
236                "name": tool.name,
237                "description": tool.description,
238                "parameters": {
239                    "type": "object",
240                    "properties": properties,
241                    "required": required,
242                }
243            })
244        })
245        .collect();
246
247    let output = json!({
248        "$schema": "https://json-schema.org/draft-07/schema#",
249        "schema_version": schema.schema_version,
250        "name": schema.name,
251        "version": schema.version,
252        "description": schema.description,
253        "tools": tools_schema,
254    });
255
256    if pretty {
257        Ok(serde_json::to_string_pretty(&output)?)
258    } else {
259        Ok(serde_json::to_string(&output)?)
260    }
261}
262
263/// Render in OpenAI function calling format.
264fn render_openai(schema: &ToolSchemaOutput, pretty: bool) -> Result<String> {
265    let functions: Vec<Value> = schema
266        .tools
267        .iter()
268        .map(|tool| {
269            let properties: BTreeMap<String, Value> = tool
270                .parameters
271                .iter()
272                .map(|p| {
273                    let mut prop = json!({
274                        "type": p.param_type,
275                    });
276
277                    if let Some(desc) = &p.description {
278                        prop["description"] = json!(desc);
279                    }
280                    if let Some(enums) = &p.enum_values {
281                        prop["enum"] = json!(enums);
282                    }
283
284                    (p.name.clone(), prop)
285                })
286                .collect();
287
288            let required: Vec<&str> = tool
289                .parameters
290                .iter()
291                .filter(|p| p.required)
292                .map(|p| p.name.as_str())
293                .collect();
294
295            json!({
296                "name": tool.name,
297                "description": tool.description,
298                "parameters": {
299                    "type": "object",
300                    "properties": properties,
301                    "required": required,
302                }
303            })
304        })
305        .collect();
306
307    let output = json!({
308        "functions": functions,
309    });
310
311    if pretty {
312        Ok(serde_json::to_string_pretty(&output)?)
313    } else {
314        Ok(serde_json::to_string(&output)?)
315    }
316}
317
318/// Render in Anthropic tool use format.
319fn render_anthropic(schema: &ToolSchemaOutput, pretty: bool) -> Result<String> {
320    let tools: Vec<Value> = schema
321        .tools
322        .iter()
323        .map(|tool| {
324            let properties: BTreeMap<String, Value> = tool
325                .parameters
326                .iter()
327                .map(|p| {
328                    let mut prop = json!({
329                        "type": p.param_type,
330                    });
331
332                    if let Some(desc) = &p.description {
333                        prop["description"] = json!(desc);
334                    }
335                    if let Some(enums) = &p.enum_values {
336                        prop["enum"] = json!(enums);
337                    }
338
339                    (p.name.clone(), prop)
340                })
341                .collect();
342
343            let required: Vec<&str> = tool
344                .parameters
345                .iter()
346                .filter(|p| p.required)
347                .map(|p| p.name.as_str())
348                .collect();
349
350            json!({
351                "name": tool.name,
352                "description": tool.description,
353                "input_schema": {
354                    "type": "object",
355                    "properties": properties,
356                    "required": required,
357                }
358            })
359        })
360        .collect();
361
362    let output = json!({
363        "tools": tools,
364    });
365
366    if pretty {
367        Ok(serde_json::to_string_pretty(&output)?)
368    } else {
369        Ok(serde_json::to_string(&output)?)
370    }
371}
372
373/// Render raw clap structure (for debugging).
374fn render_clap(schema: &ToolSchemaOutput, pretty: bool) -> Result<String> {
375    if pretty {
376        Ok(serde_json::to_string_pretty(schema)?)
377    } else {
378        Ok(serde_json::to_string(schema)?)
379    }
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385
386    fn make_test_cmd() -> Command {
387        Command::new("test")
388            .version("1.0.0")
389            .about("Test command")
390            .subcommand(
391                Command::new("sub")
392                    .about("Subcommand")
393                    .arg(Arg::new("flag").long("flag").action(ArgAction::SetTrue))
394                    .arg(
395                        Arg::new("value")
396                            .long("value")
397                            .required(true)
398                            .help("A value"),
399                    ),
400            )
401    }
402
403    #[test]
404    fn build_schema_includes_subcommands() {
405        let cmd = make_test_cmd();
406        let schema = build_tool_schema(&cmd);
407
408        assert_eq!(schema.name, "test");
409        assert_eq!(schema.version, "1.0.0");
410        assert!(!schema.tools.is_empty());
411
412        let sub = schema
413            .tools
414            .iter()
415            .find(|tool| tool.name == "sub")
416            .expect("subcommand should exist");
417        assert_eq!(sub.parameters.len(), 2);
418    }
419
420    #[test]
421    fn render_openai_has_functions_key() {
422        let cmd = make_test_cmd();
423        let schema = build_tool_schema(&cmd);
424        let output = render_output(&schema, ToolSchemaFormat::Openai, false).unwrap();
425
426        let parsed: Value = serde_json::from_str(&output).unwrap();
427        assert!(parsed.get("functions").is_some());
428    }
429
430    #[test]
431    fn render_anthropic_has_input_schema() {
432        let cmd = make_test_cmd();
433        let schema = build_tool_schema(&cmd);
434        let output = render_output(&schema, ToolSchemaFormat::Anthropic, false).unwrap();
435
436        let parsed: Value = serde_json::from_str(&output).unwrap();
437        assert!(parsed.get("tools").is_some());
438        let tools = parsed["tools"].as_array().unwrap();
439        assert!(tools.iter().any(|tool| tool.get("input_schema").is_some()));
440    }
441}