Skip to main content

kaish_tool_api/
clap_schema.rs

1//! Bridge from `clap::Command` reflection → `ToolSchema`.
2//!
3//! Each migrated builtin's `schema()` delegates to [`params_from_clap`] so the
4//! clap-derived struct in `execute()` is the single source of truth for the
5//! argv surface. Description and examples remain hand-written — clap doesn't
6//! own those concepts.
7//!
8//! See `docs/clap-migration.md` for the full recipe.
9
10use clap::{Arg, ArgAction, Command};
11
12use kaish_types::{ParamSchema, ToolSchema, Value};
13
14/// Build a `ToolSchema` for a builtin from its clap [`Command`] reflection plus
15/// hand-written description and examples.
16///
17/// Bool flags are recognised via [`ArgAction::SetTrue`] / [`ArgAction::SetFalse`].
18/// Aliases include the short flag (as a single-char string) and all visible
19/// long aliases. `--json` is **excluded** — it's the global output flag and
20/// handled by the kernel after `apply_output_format`, not declared as a per-tool
21/// param.
22pub fn schema_from_clap(
23    cmd: &Command,
24    name: &str,
25    description: &str,
26    examples: impl IntoIterator<Item = (&'static str, &'static str)>,
27) -> ToolSchema {
28    let mut schema = ToolSchema::new(name, description);
29    for param in params_from_clap(cmd) {
30        schema = schema.param(param);
31    }
32    for (desc, code) in examples {
33        schema = schema.example(desc, code);
34    }
35    schema
36}
37
38/// Build a recursive [`ToolSchema`] from a composed clap [`Command`] tree.
39///
40/// Like [`schema_from_clap`] for the top level, but also descends into
41/// `cmd.get_subcommands()`, so a subcommand-aware tool (`kj context list …`)
42/// reflects as a tree the kernel can walk with `select_leaf` to bind flags
43/// against the active leaf.
44///
45/// Each child's `name`/`description` come from the clap subcommand itself
46/// (`get_name`/`get_about`) and its command-level aliases from
47/// `get_all_aliases()`. Examples belong to the top level only — clap doesn't
48/// model per-subcommand usage examples. Flat tools (no `get_subcommands()`)
49/// produce a schema with empty `subcommands`, identical to [`schema_from_clap`].
50pub fn schema_tree_from_clap(
51    cmd: &Command,
52    name: &str,
53    description: &str,
54    examples: impl IntoIterator<Item = (&'static str, &'static str)>,
55) -> ToolSchema {
56    let mut schema = ToolSchema::new(name, description);
57    for param in params_from_clap(cmd) {
58        schema = schema.param(param);
59    }
60    for (desc, code) in examples {
61        schema = schema.example(desc, code);
62    }
63    for sub in cmd.get_subcommands() {
64        schema = schema.subcommand(child_schema_from_clap(sub));
65    }
66    schema
67}
68
69/// Reflect a clap subcommand (and its descendants) into a child [`ToolSchema`].
70///
71/// Name and description are taken from the clap command; command-level aliases
72/// from `get_all_aliases()` (visible *and* hidden, so every routable name is
73/// known to `select_leaf`).
74fn child_schema_from_clap(cmd: &Command) -> ToolSchema {
75    let name = cmd.get_name().to_string();
76    let description = cmd
77        .get_about()
78        .map(|s| s.to_string())
79        .unwrap_or_default();
80    let mut schema = ToolSchema::new(name, description);
81    for param in params_from_clap(cmd) {
82        schema = schema.param(param);
83    }
84    let aliases: Vec<String> = cmd.get_all_aliases().map(|s| s.to_string()).collect();
85    if !aliases.is_empty() {
86        schema = schema.with_command_aliases(aliases);
87    }
88    for sub in cmd.get_subcommands() {
89        schema = schema.subcommand(child_schema_from_clap(sub));
90    }
91    schema
92}
93
94/// Reflect each [`Arg`] in a clap [`Command`] into a [`ParamSchema`].
95///
96/// Skips:
97/// - `--help` / `-h` (auto-injected by clap)
98/// - `--version` / `-V` (auto-injected by clap when version is set)
99/// - `--json` (kernel-owned global flag)
100/// - Hidden *flag* args (`#[arg(hide = true)]` without an index) — true
101///   internal helpers.
102///
103/// Hidden *positional* args are kept: many migrated builtins use
104/// `#[arg(hide = true)] paths: Vec<String>` purely so clap accepts the
105/// `--`-terminated positional tail emitted by `ToolArgs::to_argv()`. Those
106/// positionals ARE the tool's public surface (`cat paths…`, `mkdir paths…`),
107/// so they belong in the schema with a `positional: true` marker and ride
108/// the field's doc-comment for the description.
109pub fn params_from_clap(cmd: &Command) -> Vec<ParamSchema> {
110    cmd.get_arguments()
111        .filter(|arg| !is_skipped(arg))
112        .map(arg_to_param)
113        .collect()
114}
115
116fn is_skipped(arg: &Arg) -> bool {
117    let id = arg.get_id().as_str();
118    if matches!(id, "help" | "version" | "json") {
119        return true;
120    }
121    // Keep hidden positionals (real user surface, just marked hidden to keep
122    // clap's --help tidy); drop hidden flag args (internal clap helpers).
123    // Use `is_positional()` — `get_index()` returns None for derived
124    // `Vec<String>` positionals even though they ARE positional.
125    arg.is_hide_set() && !arg.is_positional()
126}
127
128fn arg_to_param(arg: &Arg) -> ParamSchema {
129    let id = arg.get_id().as_str();
130    // Canonical name: prefer the long flag (the user-facing, kebab-cased form).
131    // Every tool that consumes a bound `ToolArgs` reconstructs argv via
132    // `ToolArgs::to_argv()` and re-parses it with clap (kj, cat, tail, …). That
133    // reconstruction emits `--<name>`, so the param name MUST equal the clap
134    // long, or the rebuilt flag is unknown to clap: a snake field id like
135    // `system_prompt` would render as `--system_prompt`, but clap only accepts
136    // the derived long `--system-prompt`. Positionals have no long and keep the
137    // field id. (Previously the id was canonical and the long an alias, which
138    // worked only because no builtin had a field-name ≠ long-name flag.)
139    let name = arg.get_long().unwrap_or(id).to_string();
140    let action = arg.get_action();
141    let is_bool = matches!(action, ArgAction::SetTrue | ArgAction::SetFalse);
142
143    let param_type = if is_bool {
144        "bool"
145    } else if matches!(action, ArgAction::Count) {
146        "int"
147    } else {
148        // clap erases the field type at runtime — we report "string" for all
149        // value-taking flags. The kernel's type check is a hint, not a gate.
150        "string"
151    };
152
153    let description = arg
154        .get_help()
155        .map(|s| s.to_string())
156        .or_else(|| arg.get_long_help().map(|s| s.to_string()))
157        .unwrap_or_default();
158
159    let required = arg.is_required_set();
160
161    let mut aliases: Vec<String> = Vec::new();
162    if let Some(short) = arg.get_short() {
163        aliases.push(short.to_string());
164    }
165    // Keep the snake-case field id reachable as an alias when it differs from
166    // the canonical long, so a consumer (or script) that addresses the arg by
167    // its Rust field name still matches. The long is now the canonical `name`.
168    if id != name {
169        aliases.push(id.to_string());
170    }
171    if let Some(visible) = arg.get_visible_aliases() {
172        for alias in visible {
173            aliases.push(alias.to_string());
174        }
175    }
176
177    let consumes = match arg.get_num_args() {
178        Some(range) => {
179            let lo = range.min_values();
180            if lo == 0 { 1 } else { lo }
181        }
182        None => 1,
183    };
184
185    let default = if is_bool {
186        Some(Value::Bool(false))
187    } else {
188        None
189    };
190
191    // `arg.is_positional()` is the right oracle: clap returns true for any
192    // positional arg, whereas `get_index()` returns None for derived
193    // `Vec<String>` positionals. Positional slot ordering is by appearance
194    // in `schema.params`, not by clap's internal index.
195    let positional = arg.is_positional();
196
197    ParamSchema::new(name, param_type.to_string())
198        .with_required(required)
199        .with_default(default)
200        .with_description(description)
201        .with_aliases(aliases)
202        .consumes(consumes)
203        .with_positional(positional)
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209    use clap::{CommandFactory, Parser};
210
211    #[derive(Parser, Debug)]
212    #[command(name = "demo", about = "demo tool")]
213    struct DemoArgs {
214        /// Number output lines.
215        #[arg(short = 'n', long = "number")]
216        number: bool,
217
218        /// Number of lines.
219        #[arg(short = 'l', long = "lines", default_value_t = 10)]
220        lines: i64,
221
222        /// Files to read.
223        #[arg(hide = true)]
224        paths: Vec<String>,
225    }
226
227    /// A demo struct with a hidden internal flag (not positional) — those
228    /// should still be skipped from the schema.
229    #[derive(Parser, Debug)]
230    #[command(name = "demo-internal", about = "demo with internal flag")]
231    struct DemoInternalArgs {
232        #[arg(hide = true, long = "internal-only")]
233        internal: bool,
234
235        /// Files to read.
236        paths: Vec<String>,
237    }
238
239    #[test]
240    fn bool_flag_becomes_bool_param() {
241        let cmd = DemoArgs::command();
242        let params = params_from_clap(&cmd);
243        let p = params.iter().find(|p| p.name == "number").expect("number param");
244        assert_eq!(p.param_type, "bool");
245        assert!(!p.required);
246        assert_eq!(p.aliases, vec!["n".to_string()]);
247        assert!(p.description.contains("Number output lines"));
248    }
249
250    #[test]
251    fn value_flag_reports_short_alias_and_string_type() {
252        let cmd = DemoArgs::command();
253        let params = params_from_clap(&cmd);
254        let p = params.iter().find(|p| p.name == "lines").expect("lines param");
255        assert_eq!(p.param_type, "string");
256        assert!(!p.required);
257        assert_eq!(p.aliases, vec!["l".to_string()]);
258    }
259
260    #[test]
261    fn hidden_positional_is_kept_and_marked_positional() {
262        let cmd = DemoArgs::command();
263        let params = params_from_clap(&cmd);
264        let p = params.iter().find(|p| p.name == "paths").expect("paths param");
265        assert!(p.positional, "hidden positional sink should be exposed as positional");
266        assert_eq!(p.param_type, "string");
267        assert!(p.description.contains("Files to read"));
268    }
269
270    #[test]
271    fn hidden_flag_is_dropped() {
272        let cmd = DemoInternalArgs::command();
273        let params = params_from_clap(&cmd);
274        assert!(
275            params.iter().all(|p| p.name != "internal"),
276            "hidden non-positional flag should be skipped: {:?}",
277            params.iter().map(|p| &p.name).collect::<Vec<_>>()
278        );
279        // Non-hidden positional still appears.
280        assert!(params.iter().any(|p| p.name == "paths" && p.positional));
281    }
282
283    /// A field named `_foo` (Rust convention for unused) used to leak its
284    /// leading underscore into the schema `id`. The kernel's flag
285    /// canonicalization would then route `--foo VALUE` through as
286    /// `--_foo=VALUE`, which clap rejects. Callers must override with
287    /// `#[arg(id = "foo", ...)]` to keep the schema name clean. This test
288    /// pins both pre-fix and post-fix behavior on a small sample so the
289    /// convention stays load-bearing.
290    #[derive(Parser, Debug)]
291    #[command(name = "demo-id-override")]
292    struct DemoIdOverrideArgs {
293        /// Without an id override, the leading `_` leaks through.
294        #[arg(short = 'b', long = "bare")]
295        _bare: Option<String>,
296
297        /// With `id = "..."`, the schema name is clean.
298        #[arg(id = "clean", short = 'c', long = "clean")]
299        _clean: Option<String>,
300    }
301
302    #[test]
303    fn id_override_strips_leading_underscore_from_schema_name() {
304        let cmd = DemoIdOverrideArgs::command();
305        let params = params_from_clap(&cmd);
306
307        // The canonical name is the LONG flag (`bare`), NOT the snake field id
308        // (`_bare`). Tools reconstruct argv as `--<name>` and re-parse with clap,
309        // so the name must equal the clap long. The field id rides as an alias so
310        // it stays addressable. (Previously the id was canonical and the leading
311        // `_` leaked into the schema name — that's the bug this now guards.)
312        let bare = params.iter().find(|p| p.name == "bare")
313            .expect("name should be the long flag `bare`, not the field id `_bare`");
314        assert_eq!(bare.aliases, vec!["b".to_string(), "_bare".to_string()]);
315        assert!(
316            !params.iter().any(|p| p.name == "_bare"),
317            "the snake field id must not be the canonical name"
318        );
319
320        // With an explicit `id = "clean"` matching the long: name is `clean` and
321        // there's no redundant id alias (id == long == name).
322        let clean = params.iter().find(|p| p.name == "clean")
323            .expect("name should be the long flag `clean`");
324        assert_eq!(clean.aliases, vec!["c".to_string()]);
325    }
326
327    #[test]
328    fn flag_params_are_not_marked_positional() {
329        let cmd = DemoArgs::command();
330        let params = params_from_clap(&cmd);
331        let p = params.iter().find(|p| p.name == "number").unwrap();
332        assert!(!p.positional);
333        let p = params.iter().find(|p| p.name == "lines").unwrap();
334        assert!(!p.positional);
335    }
336
337    #[test]
338    fn help_version_json_filtered() {
339        let cmd = DemoArgs::command();
340        let params = params_from_clap(&cmd);
341        assert!(params.iter().all(|p| !matches!(p.name.as_str(), "help" | "version" | "json")));
342    }
343
344    /// A composed two-level command tree reflects into a recursive schema:
345    /// child names, command aliases, and leaf params all land on the right node.
346    #[test]
347    fn schema_tree_reflects_subcommands_and_aliases() {
348        // Build a `kj`-shaped tree by hand: kj → context (alias ctx) → list.
349        let list = Command::new("list").about("list contexts").visible_alias("ls");
350        let context = Command::new("context")
351            .about("context ops")
352            .visible_alias("ctx")
353            .arg(Arg::new("type").long("type").short('t').action(ArgAction::Set))
354            .subcommand(list);
355        let kj = Command::new("kj").about("kaijutsu").subcommand(context);
356
357        let schema = schema_tree_from_clap(&kj, "kj", "kaijutsu", []);
358
359        assert_eq!(schema.subcommands.len(), 1, "kj should have one child");
360        let context = &schema.subcommands[0];
361        assert!(context.matches_command("context"));
362        assert!(context.matches_command("ctx"), "command alias should route");
363        // The `--type`/`-t` value flag lives on the context leaf, not root.
364        let type_param = context.params.iter().find(|p| p.name == "type").expect("type on context");
365        assert_eq!(type_param.param_type, "string");
366        assert_eq!(type_param.aliases, vec!["t".to_string()]);
367        assert!(schema.params.iter().all(|p| p.name != "type"), "leaf flag must not leak to root");
368
369        assert_eq!(context.subcommands.len(), 1);
370        let list = &context.subcommands[0];
371        assert!(list.matches_command("list"));
372        assert!(list.matches_command("ls"));
373    }
374
375    /// A flat command (no subcommands) reflects with empty `subcommands`,
376    /// identical to `schema_from_clap`.
377    #[test]
378    fn schema_tree_of_flat_command_has_no_subcommands() {
379        let cmd = DemoArgs::command();
380        let schema = schema_tree_from_clap(&cmd, "demo", "demo tool", []);
381        assert!(schema.subcommands.is_empty());
382        assert!(schema.aliases.is_empty());
383        // Same params as the flat reflection.
384        assert_eq!(schema.params.len(), params_from_clap(&cmd).len());
385    }
386}