Skip to main content

bashkit/scripted_tool/
mod.rs

1//! Scripted tool
2//!
3//! Compose tool definitions + callbacks into a single [`Tool`] that accepts bash
4//! scripts. Each registered tool becomes a builtin command inside the interpreter,
5//! so an LLM can orchestrate many operations in one call using pipes, variables,
6//! loops, and conditionals.
7//!
8//! This module follows the same contract surface as [`crate::tool`]:
9//!
10//! - [`ScriptedToolBuilder::build`] -> immutable metadata object
11//! - [`ScriptedToolBuilder::build_service`] -> `tower::Service<Value, Value>`
12//! - [`Tool::execution`] -> validated, single-use [`crate::ToolExecution`]
13//! - [`Tool::help`] -> Markdown docs
14//! - [`Tool::system_prompt`] -> terse plain-text instructions
15//!
16//! # Architecture
17//!
18//! ```text
19//! ┌─────────────────────────────────────────┐
20//! │  ScriptedTool  (implements Tool)        │
21//! │                                         │
22//! │  ┌─────────┐ ┌─────────┐ ┌──────────┐  │
23//! │  │get_user │ │get_order│ │inventory │  │
24//! │  │(builtin)│ │(builtin)│ │(builtin) │  │
25//! │  └─────────┘ └─────────┘ └──────────┘  │
26//! │        ↑           ↑           ↑        │
27//! │  bash script: pipes, vars, jq, loops    │
28//! └─────────────────────────────────────────┘
29//! ```
30//!
31//! # Example
32//!
33//! ```rust
34//! use bashkit::{ScriptedTool, Tool, ToolArgs, ToolDef};
35//!
36//! # tokio_test::block_on(async {
37//! let tool = ScriptedTool::builder("api")
38//!     .tool_fn(
39//!         ToolDef::new("greet", "Greet a user")
40//!             .with_schema(serde_json::json!({
41//!                 "type": "object",
42//!                 "properties": { "name": {"type": "string"} }
43//!             })),
44//!         |args: &ToolArgs| {
45//!             let name = args.param_str("name").unwrap_or("world");
46//!             Ok(format!("hello {name}\n"))
47//!         },
48//!     )
49//!     .build();
50//!
51//! let output = tool
52//!     .execution(serde_json::json!({"commands": "greet --name Alice"}))
53//!     .expect("valid args")
54//!     .execute()
55//!     .await
56//!     .expect("execution succeeds");
57//!
58//! assert_eq!(output.result["stdout"], "hello Alice\n");
59//! assert!(tool.help().contains("## Tool Commands"));
60//! # });
61//! ```
62//!
63//! # Shared context across callbacks
64//!
65//! When multiple tool callbacks need shared resources (HTTP clients, auth tokens,
66//! config), use the standard Rust closure-capture pattern with `Arc`:
67//!
68//! ```rust
69//! use bashkit::{ScriptedTool, ToolArgs, ToolDef};
70//! use std::sync::Arc;
71//!
72//! let api_key = Arc::new("sk-secret-key".to_string());
73//! let base_url = Arc::new("https://api.example.com".to_string());
74//!
75//! let k = api_key.clone();
76//! let u = base_url.clone();
77//! let mut builder = ScriptedTool::builder("api");
78//! builder = builder.tool_fn(
79//!     ToolDef::new("get_user", "Fetch user by ID"),
80//!     move |args: &ToolArgs| {
81//!         let _key = &*k;   // shared API key
82//!         let _url = &*u;   // shared base URL
83//!         Ok(format!("{{\"id\":1}}\n"))
84//!     },
85//! );
86//!
87//! let k2 = api_key.clone();
88//! let u2 = base_url.clone();
89//! builder = builder.tool_fn(
90//!     ToolDef::new("list_orders", "List orders"),
91//!     move |_args: &ToolArgs| {
92//!         let _key = &*k2;
93//!         let _url = &*u2;
94//!         Ok(format!("[]\n"))
95//!     },
96//! );
97//! let _tool = builder.build();
98//! ```
99//!
100//! For mutable shared state, use `Arc<Mutex<T>>`:
101//!
102//! ```rust
103//! use bashkit::{ScriptedTool, ToolArgs, ToolDef};
104//! use std::sync::{Arc, Mutex};
105//!
106//! let call_count = Arc::new(Mutex::new(0u64));
107//! let c = call_count.clone();
108//! let tool = ScriptedTool::builder("api")
109//!     .tool_fn(
110//!         ToolDef::new("tracked", "Counted call"),
111//!         move |_args: &ToolArgs| {
112//!             let mut count = c.lock().unwrap();
113//!             *count += 1;
114//!             Ok(format!("call #{count}\n"))
115//!         },
116//!     )
117//!     .build();
118//! ```
119//!
120//! # State across execute() calls
121//!
122//! Each `execute()` creates a fresh Bash interpreter — no state carries over.
123//! This is a security feature (clean sandbox per call). The LLM carries state
124//! between calls via its context window: it sees stdout from each call and can
125//! pass relevant data from one call's output into the next call's script.
126//!
127//! For persistent state across calls via callbacks, use `Arc` in closures —
128//! the same `Arc<ToolCallback>` instances are reused across `execute()` calls.
129
130mod execute;
131mod extension;
132mod toolset;
133
134pub use extension::{ToolDefExtension, ToolDefExtensionBuilder, ToolDefInvocationTrace};
135pub use toolset::{DiscoverTool, DiscoveryMode, ScriptingToolSet, ScriptingToolSetBuilder};
136
137// Re-export foundational types from tool_def (they used to live here).
138pub use crate::tool_def::{
139    AsyncToolCallback, AsyncToolExec, SyncToolExec, ToolArgs, ToolCallback, ToolDef, ToolImpl,
140};
141
142use crate::{ExecutionLimits, Tool, ToolService};
143use serde::{Deserialize, Serialize};
144use std::sync::{Arc, Mutex};
145
146/// Sync or async callback for a registered tool.
147#[derive(Clone)]
148pub enum CallbackKind {
149    /// Synchronous callback — blocks until complete.
150    Sync(SyncToolExec),
151    /// Asynchronous callback — `.await`ed inside the interpreter.
152    Async(AsyncToolExec),
153}
154
155// ============================================================================
156// Execution trace — inner scripted command/builtin usage
157// ============================================================================
158
159/// Kind of inner command invocation recorded during a `ScriptedTool` execute.
160#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
161pub enum ScriptedCommandKind {
162    Tool,
163    Help,
164    Discover,
165}
166
167/// One builtin/tool invocation inside a scripted tool execute.
168#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
169pub struct ScriptedCommandInvocation {
170    pub name: String,
171    pub kind: ScriptedCommandKind,
172    pub args: Vec<String>,
173    pub exit_code: i32,
174}
175
176/// Inner execution trace captured for the last `ScriptedTool::execute()` call.
177#[derive(Debug, Clone, Default, Serialize, Deserialize)]
178pub struct ScriptedExecutionTrace {
179    pub invocations: Vec<ScriptedCommandInvocation>,
180}
181
182// ============================================================================
183// RegisteredTool — internal definition + callback pair
184// ============================================================================
185
186/// A registered tool: definition + callback.
187#[derive(Clone)]
188pub(crate) struct RegisteredTool {
189    pub(crate) def: ToolDef,
190    pub(crate) callback: CallbackKind,
191    pub(crate) dry_run: Option<CallbackKind>,
192}
193
194impl RegisteredTool {
195    /// Create from a [`ToolImpl`], converting its exec/exec_sync to a
196    /// [`CallbackKind`]. Prefers async when available.
197    pub(crate) fn from_tool_impl(tool: ToolImpl) -> Self {
198        let callback = if let Some(async_cb) = tool.exec {
199            CallbackKind::Async(async_cb)
200        } else if let Some(sync_cb) = tool.exec_sync {
201            CallbackKind::Sync(sync_cb)
202        } else {
203            // Schema-only ToolImpl — wrap as a sync callback that always errors.
204            let name = tool.def.name.clone();
205            CallbackKind::Sync(Arc::new(move |_| Err(format!("{name}: no exec defined"))))
206        };
207        Self {
208            def: tool.def,
209            callback,
210            dry_run: None,
211        }
212    }
213}
214
215// ============================================================================
216// ScriptedToolBuilder
217// ============================================================================
218
219/// Builder for [`ScriptedTool`].
220///
221/// ```rust
222/// use bashkit::{ScriptedTool, ToolArgs, ToolDef};
223///
224/// let tool = ScriptedTool::builder("net")
225///     .short_description("Network tools")
226///     .tool_fn(
227///         ToolDef::new("ping", "Ping a host")
228///             .with_schema(serde_json::json!({
229///                 "type": "object",
230///                 "properties": { "host": {"type": "string"} }
231///             })),
232///         |args: &ToolArgs| {
233///             Ok(format!("pong {}\n", args.param_str("host").unwrap_or("?")))
234///         },
235///     )
236///     .build();
237/// ```
238pub struct ScriptedToolBuilder {
239    name: String,
240    locale: String,
241    short_desc: Option<String>,
242    tools: Vec<RegisteredTool>,
243    limits: Option<ExecutionLimits>,
244    env_vars: Vec<(String, String)>,
245    compact_prompt: bool,
246    /// When true, callback errors are replaced with a generic message to prevent
247    /// leaking internal details (file paths, connection strings, stack traces).
248    sanitize_errors: bool,
249}
250
251impl ScriptedToolBuilder {
252    pub(crate) fn new(name: impl Into<String>) -> Self {
253        Self {
254            name: name.into(),
255            locale: "en-US".to_string(),
256            short_desc: None,
257            tools: Vec::new(),
258            limits: None,
259            env_vars: Vec::new(),
260            compact_prompt: false,
261            sanitize_errors: true,
262        }
263    }
264
265    /// Set locale for descriptions, help, prompts, and user-facing errors.
266    pub fn locale(mut self, locale: &str) -> Self {
267        self.locale = locale.to_string();
268        self
269    }
270
271    /// One-line description for tool listings.
272    pub fn short_description(mut self, desc: impl Into<String>) -> Self {
273        self.short_desc = Some(desc.into());
274        self
275    }
276
277    /// Register a [`ToolImpl`] (definition + exec functions).
278    ///
279    /// This is the preferred registration method. The `ToolImpl` carries its own
280    /// name, schema, and sync/async exec.
281    pub fn tool(mut self, tool: ToolImpl) -> Self {
282        self.tools.push(RegisteredTool::from_tool_impl(tool));
283        self
284    }
285
286    /// Register a tool with its definition and synchronous exec function.
287    ///
288    /// Convenience shorthand — constructs a [`ToolImpl`] internally.
289    /// The exec receives [`ToolArgs`] with `--key value` flags parsed into
290    /// a JSON object, type-coerced per the schema.
291    pub fn tool_fn(
292        mut self,
293        def: ToolDef,
294        exec: impl Fn(&ToolArgs) -> Result<String, String> + Send + Sync + 'static,
295    ) -> Self {
296        self.tools.push(RegisteredTool {
297            def,
298            callback: CallbackKind::Sync(Arc::new(exec)),
299            dry_run: None,
300        });
301        self
302    }
303
304    /// Register a tool with its definition, exec function, and a custom
305    /// `--dry-run` handler. When the tool is invoked with `--dry-run`,
306    /// the custom handler runs instead of the regular callback.
307    pub fn tool_with_dry_run(
308        mut self,
309        def: ToolDef,
310        exec: impl Fn(&ToolArgs) -> Result<String, String> + Send + Sync + 'static,
311        dry_run: impl Fn(&ToolArgs) -> Result<String, String> + Send + Sync + 'static,
312    ) -> Self {
313        self.tools.push(RegisteredTool {
314            def,
315            callback: CallbackKind::Sync(Arc::new(exec)),
316            dry_run: Some(CallbackKind::Sync(Arc::new(dry_run))),
317        });
318        self
319    }
320
321    /// Register a tool with its definition and **async** exec function.
322    ///
323    /// Convenience shorthand — constructs a [`ToolImpl`] internally.
324    /// Same as [`tool_fn()`](Self::tool_fn) but returns a `Future`,
325    /// allowing non-blocking I/O. Takes owned [`ToolArgs`] because the future
326    /// may outlive the borrow.
327    pub fn async_tool_fn<F, Fut>(mut self, def: ToolDef, exec: F) -> Self
328    where
329        F: Fn(ToolArgs) -> Fut + Send + Sync + 'static,
330        Fut: Future<Output = Result<String, String>> + Send + 'static,
331    {
332        let cb: AsyncToolExec = Arc::new(move |args| Box::pin(exec(args)));
333        self.tools.push(RegisteredTool {
334            def,
335            callback: CallbackKind::Async(cb),
336            dry_run: None,
337        });
338        self
339    }
340
341    /// Set execution limits for the bash interpreter.
342    pub fn limits(mut self, limits: ExecutionLimits) -> Self {
343        self.limits = Some(limits);
344        self
345    }
346
347    /// Add an environment variable visible inside scripts.
348    pub fn env(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
349        self.env_vars.push((key.into(), value.into()));
350        self
351    }
352
353    /// Control whether callback error messages are sanitized before appearing in
354    /// tool output. When `true` (the default), internal error details are replaced
355    /// with a generic "callback failed" message to prevent leaking file paths,
356    /// connection strings, or stack traces to LLM agents.
357    // THREAT[TM-INF-030]: Prevent information disclosure through callback errors.
358    pub fn sanitize_errors(mut self, sanitize: bool) -> Self {
359        self.sanitize_errors = sanitize;
360        self
361    }
362
363    /// Emit compact `system_prompt()` that omits full schemas and adds help tip.
364    ///
365    /// When enabled, `system_prompt()` lists only tool names + one-liners and
366    /// instructs the LLM to use `help <tool>` / `help <tool> --json` for details.
367    /// Default: `false` (full schemas in prompt, backward compatible).
368    pub fn compact_prompt(mut self, compact: bool) -> Self {
369        self.compact_prompt = compact;
370        self
371    }
372
373    /// Build the [`ScriptedTool`].
374    pub fn build(&self) -> ScriptedTool {
375        let short_desc = self
376            .short_desc
377            .clone()
378            .unwrap_or_else(|| format!("ScriptedTool: {}", self.name));
379        let tool_names = self
380            .tools
381            .iter()
382            .map(|tool| tool.def.name.as_str())
383            .collect::<Vec<_>>()
384            .join(", ");
385
386        ScriptedTool {
387            name: self.name.clone(),
388            locale: self.locale.clone(),
389            display_name: self.name.clone(),
390            short_desc,
391            description: format!(
392                "{}: {}",
393                super::tool::localized(
394                    self.locale.as_str(),
395                    "Compose tool callbacks through bash scripts",
396                    "Компонує виклики інструментів через bash-скрипти",
397                ),
398                tool_names
399            ),
400            tools: self.tools.clone(),
401            limits: self.limits.clone(),
402            env_vars: self.env_vars.clone(),
403            compact_prompt: self.compact_prompt,
404            sanitize_errors: self.sanitize_errors,
405            last_execution_trace: Arc::new(Mutex::new(None)),
406        }
407    }
408
409    /// Build a `tower::Service<Value, Response = Value, Error = ToolError>`.
410    pub fn build_service(&self) -> ToolService {
411        let tool = self.build();
412        tower::util::BoxCloneService::new(tower::service_fn(move |args| {
413            let tool = tool.clone();
414            async move {
415                let execution = tool.execution(args)?;
416                let output = execution.execute().await?;
417                Ok(output.result)
418            }
419        }))
420    }
421
422    /// Build an OpenAI-compatible tool definition.
423    pub fn build_tool_definition(&self) -> serde_json::Value {
424        let tool = self.build();
425        serde_json::json!({
426            "type": "function",
427            "function": {
428                "name": tool.name(),
429                "description": tool.description(),
430                "parameters": self.build_input_schema(),
431            }
432        })
433    }
434
435    /// Build the input schema without constructing the full tool.
436    pub fn build_input_schema(&self) -> serde_json::Value {
437        crate::tool::tool_request_schema()
438    }
439
440    /// Build the output schema for `ToolOutput::result`.
441    pub fn build_output_schema(&self) -> serde_json::Value {
442        crate::tool::tool_response_schema()
443    }
444}
445
446// ============================================================================
447// ScriptedTool
448// ============================================================================
449
450/// A [`Tool`] that orchestrates multiple tools via bash scripts.
451///
452/// Each registered tool (defined by [`ToolDef`] + callback) becomes a bash builtin.
453/// The LLM sends a bash script that can pipe, loop, branch, and compose these
454/// builtins together with standard utilities like `jq`, `grep`, `sed`, etc.
455///
456/// Arguments are passed as `--key value` flags and parsed into typed JSON
457/// per the tool's `input_schema`.
458///
459/// Reusable — `execute()` can be called multiple times. Each call gets a fresh
460/// Bash interpreter with the same set of tool builtins.
461///
462/// Create via [`ScriptedTool::builder`].
463#[derive(Clone)]
464pub struct ScriptedTool {
465    pub(crate) name: String,
466    pub(crate) locale: String,
467    pub(crate) display_name: String,
468    pub(crate) short_desc: String,
469    pub(crate) description: String,
470    pub(crate) tools: Vec<RegisteredTool>,
471    pub(crate) limits: Option<ExecutionLimits>,
472    pub(crate) env_vars: Vec<(String, String)>,
473    pub(crate) compact_prompt: bool,
474    pub(crate) sanitize_errors: bool,
475    pub(crate) last_execution_trace: Arc<Mutex<Option<ScriptedExecutionTrace>>>,
476}
477
478impl ScriptedTool {
479    /// Create a builder with the given tool name.
480    pub fn builder(name: impl Into<String>) -> ScriptedToolBuilder {
481        ScriptedToolBuilder::new(name)
482    }
483
484    /// Return and clear the trace from the most recent execute call.
485    pub fn take_last_execution_trace(&self) -> Option<ScriptedExecutionTrace> {
486        self.last_execution_trace
487            .lock()
488            .expect("scripted execution trace poisoned")
489            .take()
490    }
491
492    pub(crate) fn store_last_execution_trace(&self, trace: ScriptedExecutionTrace) {
493        *self
494            .last_execution_trace
495            .lock()
496            .expect("scripted execution trace poisoned") = Some(trace);
497    }
498}
499
500// ============================================================================
501// Tests
502// ============================================================================
503
504#[cfg(test)]
505mod tests {
506    use super::*;
507    use crate::tool::{Tool, ToolRequest, VERSION};
508
509    fn build_test_tool() -> ScriptedTool {
510        ScriptedTool::builder("test_api")
511            .short_description("Test API")
512            .tool_fn(
513                ToolDef::new("get_user", "Fetch user by id").with_schema(serde_json::json!({
514                    "type": "object",
515                    "properties": {
516                        "id": {"type": "integer"}
517                    }
518                })),
519                |args: &ToolArgs| {
520                    let id = args.param_i64("id").ok_or("missing --id")?;
521                    Ok(format!(
522                        "{{\"id\":{id},\"name\":\"Alice\",\"email\":\"alice@example.com\"}}\n"
523                    ))
524                },
525            )
526            .tool_fn(
527                ToolDef::new("get_orders", "List orders for user").with_schema(serde_json::json!({
528                    "type": "object",
529                    "properties": {
530                        "user_id": {"type": "integer"}
531                    }
532                })),
533                |args: &ToolArgs| {
534                    let uid = args.param_i64("user_id").ok_or("missing --user_id")?;
535                    Ok(format!(
536                        "[{{\"order_id\":1,\"user_id\":{uid},\"total\":29.99}},\
537                         {{\"order_id\":2,\"user_id\":{uid},\"total\":49.50}}]\n"
538                    ))
539                },
540            )
541            .tool_fn(
542                ToolDef::new("fail_tool", "Always fails"),
543                |_args: &ToolArgs| Err("service unavailable".to_string()),
544            )
545            .tool_fn(
546                ToolDef::new("from_stdin", "Read from stdin, uppercase it"),
547                |args: &ToolArgs| match args.stdin.as_deref() {
548                    Some(input) => Ok(input.to_uppercase()),
549                    None => Err("no stdin".to_string()),
550                },
551            )
552            .build()
553    }
554
555    // -- Builder tests --
556
557    #[test]
558    fn test_builder_name_and_description() {
559        let tool = build_test_tool();
560        assert_eq!(tool.name(), "test_api");
561        assert_eq!(tool.short_description(), "Test API");
562    }
563
564    #[test]
565    fn test_builder_default_short_description() {
566        let tool = ScriptedTool::builder("mytools")
567            .tool_fn(ToolDef::new("noop", "No-op"), |_args: &ToolArgs| {
568                Ok("ok\n".to_string())
569            })
570            .build();
571        assert_eq!(tool.short_description(), "ScriptedTool: mytools");
572    }
573
574    #[test]
575    fn test_description_lists_tools() {
576        let tool = build_test_tool();
577        let desc = tool.description();
578        assert!(desc.contains("get_user"));
579        assert!(desc.contains("get_orders"));
580        assert!(desc.contains("fail_tool"));
581        assert!(desc.contains("from_stdin"));
582    }
583
584    #[test]
585    fn test_help_has_tool_commands_section() {
586        let tool = build_test_tool();
587        let help = tool.help();
588        assert!(help.contains("## Tool Commands"));
589        assert!(help.contains("get_user"));
590        assert!(help.contains("Fetch user by id"));
591    }
592
593    #[test]
594    fn test_system_prompt_lists_tools() {
595        let tool = build_test_tool();
596        let sp = tool.system_prompt();
597        assert!(sp.starts_with("test_api:"));
598        assert!(sp.contains("get_user"));
599        assert!(sp.contains("get_orders"));
600        assert!(sp.contains("--key value"));
601    }
602
603    #[test]
604    fn test_system_prompt_includes_schema() {
605        let tool = ScriptedTool::builder("schema_test")
606            .tool_fn(
607                ToolDef::new("get_user", "Fetch user by id").with_schema(serde_json::json!({
608                    "type": "object",
609                    "properties": {
610                        "id": {"type": "integer"}
611                    },
612                    "required": ["id"]
613                })),
614                |_args: &ToolArgs| Ok("ok\n".to_string()),
615            )
616            .build();
617        let sp = tool.system_prompt();
618        assert!(
619            sp.contains("--id <integer>"),
620            "system prompt should show flags"
621        );
622    }
623
624    #[test]
625    fn test_schemas() {
626        let tool = build_test_tool();
627        let input = tool.input_schema();
628        assert!(input["properties"]["commands"].is_object());
629        let output = tool.output_schema();
630        assert!(output["properties"]["stdout"].is_object());
631    }
632
633    #[test]
634    fn test_builder_contract_helpers() {
635        let builder = ScriptedTool::builder("test_api")
636            .tool_fn(ToolDef::new("ping", "Ping"), |_args: &ToolArgs| {
637                Ok("pong\n".to_string())
638            });
639        let definition = builder.build_tool_definition();
640        let input_schema = builder.build_input_schema();
641        let output_schema = builder.build_output_schema();
642
643        assert_eq!(definition["type"], "function");
644        assert_eq!(definition["function"]["name"], "test_api");
645        assert_eq!(definition["function"]["parameters"], input_schema);
646        assert!(output_schema["properties"]["stdout"].is_object());
647    }
648
649    #[tokio::test]
650    async fn test_builder_service_executes() {
651        use tower::ServiceExt;
652
653        let service = ScriptedTool::builder("test_api")
654            .tool_fn(ToolDef::new("ping", "Ping"), |_args: &ToolArgs| {
655                Ok("pong\n".to_string())
656            })
657            .build_service();
658
659        let result = service
660            .oneshot(serde_json::json!({"commands": "ping"}))
661            .await
662            .unwrap_or_else(|err| panic!("service should execute: {err}"));
663
664        assert_eq!(result["stdout"], "pong\n");
665        assert_eq!(result["exit_code"], 0);
666    }
667
668    #[test]
669    fn test_locale_localizes_description() {
670        let tool = ScriptedTool::builder("ua_api")
671            .locale("uk-UA")
672            .tool_fn(ToolDef::new("ping", "Ping"), |_args: &ToolArgs| {
673                Ok("pong\n".to_string())
674            })
675            .build();
676
677        assert!(tool.description().contains("Компонує"));
678        assert_eq!(tool.locale(), "uk-UA");
679    }
680
681    #[test]
682    fn test_version() {
683        let tool = build_test_tool();
684        assert_eq!(tool.version(), VERSION);
685    }
686
687    // -- Execution tests --
688
689    #[tokio::test]
690    async fn test_execute_empty() {
691        let tool = build_test_tool();
692        let resp = tool
693            .execute(ToolRequest {
694                commands: String::new(),
695                timeout_ms: None,
696            })
697            .await;
698        assert_eq!(resp.exit_code, 0);
699        assert!(resp.stdout.is_empty());
700    }
701
702    #[tokio::test]
703    async fn test_execute_single_tool() {
704        let tool = build_test_tool();
705        let resp = tool
706            .execute(ToolRequest {
707                commands: "get_user --id 42".to_string(),
708                timeout_ms: None,
709            })
710            .await;
711        assert_eq!(resp.exit_code, 0);
712        assert!(resp.stdout.contains("\"name\":\"Alice\""));
713        assert!(resp.stdout.contains("\"id\":42"));
714    }
715
716    #[tokio::test]
717    async fn test_execute_key_equals_value() {
718        let tool = build_test_tool();
719        let resp = tool
720            .execute(ToolRequest {
721                commands: "get_user --id=42".to_string(),
722                timeout_ms: None,
723            })
724            .await;
725        assert_eq!(resp.exit_code, 0);
726        assert!(resp.stdout.contains("\"id\":42"));
727    }
728
729    #[tokio::test]
730    async fn test_execute_pipeline_with_jq() {
731        let tool = build_test_tool();
732        let resp = tool
733            .execute(ToolRequest {
734                commands: "get_user --id 42 | jq -r '.name'".to_string(),
735                timeout_ms: None,
736            })
737            .await;
738        assert_eq!(resp.exit_code, 0);
739        assert_eq!(resp.stdout.trim(), "Alice");
740    }
741
742    #[tokio::test]
743    async fn test_execute_multi_step() {
744        let tool = build_test_tool();
745        let script = r#"
746            user=$(get_user --id 1)
747            name=$(echo "$user" | jq -r '.name')
748            orders=$(get_orders --user_id 1)
749            total=$(echo "$orders" | jq '[.[].total] | add')
750            echo "User: $name, Total: $total"
751        "#;
752        let resp = tool
753            .execute(ToolRequest {
754                commands: script.to_string(),
755                timeout_ms: None,
756            })
757            .await;
758        assert_eq!(resp.exit_code, 0);
759        assert_eq!(resp.stdout.trim(), "User: Alice, Total: 79.49");
760    }
761
762    #[tokio::test]
763    async fn test_execute_tool_failure() {
764        let tool = build_test_tool();
765        let resp = tool
766            .execute(ToolRequest {
767                commands: "fail_tool".to_string(),
768                timeout_ms: None,
769            })
770            .await;
771        assert_ne!(resp.exit_code, 0);
772        assert!(resp.stderr.contains("callback failed"));
773    }
774
775    #[tokio::test]
776    async fn test_execute_tool_failure_with_fallback() {
777        let tool = build_test_tool();
778        let resp = tool
779            .execute(ToolRequest {
780                commands: "fail_tool || echo 'fallback'".to_string(),
781                timeout_ms: None,
782            })
783            .await;
784        assert_eq!(resp.exit_code, 0);
785        assert!(resp.stdout.contains("fallback"));
786    }
787
788    #[tokio::test]
789    async fn test_execute_stdin_pipe() {
790        let tool = build_test_tool();
791        let resp = tool
792            .execute(ToolRequest {
793                commands: "echo hello | from_stdin".to_string(),
794                timeout_ms: None,
795            })
796            .await;
797        assert_eq!(resp.exit_code, 0);
798        assert_eq!(resp.stdout.trim(), "HELLO");
799    }
800
801    #[tokio::test]
802    async fn test_execute_loop_over_tools() {
803        let tool = build_test_tool();
804        let script = r#"
805            for uid in 1 2 3; do
806                get_user --id $uid | jq -r '.name'
807            done
808        "#;
809        let resp = tool
810            .execute(ToolRequest {
811                commands: script.to_string(),
812                timeout_ms: None,
813            })
814            .await;
815        assert_eq!(resp.exit_code, 0);
816        assert_eq!(resp.stdout.trim(), "Alice\nAlice\nAlice");
817    }
818
819    #[tokio::test]
820    async fn test_execute_conditional() {
821        let tool = build_test_tool();
822        let script = r#"
823            user=$(get_user --id 5)
824            name=$(echo "$user" | jq -r '.name')
825            if [ "$name" = "Alice" ]; then
826                echo "found alice"
827            else
828                echo "not alice"
829            fi
830        "#;
831        let resp = tool
832            .execute(ToolRequest {
833                commands: script.to_string(),
834                timeout_ms: None,
835            })
836            .await;
837        assert_eq!(resp.exit_code, 0);
838        assert_eq!(resp.stdout.trim(), "found alice");
839    }
840
841    #[tokio::test]
842    async fn test_scripted_tool_rejects_filesystem_command() {
843        let tool = build_test_tool();
844        let resp = tool
845            .execute(ToolRequest {
846                commands: "mkdir -p /tmp/work".to_string(),
847                timeout_ms: None,
848            })
849            .await;
850
851        assert_eq!(resp.exit_code, 127);
852        assert!(resp.stderr.contains("command not found"), "{}", resp.stderr);
853    }
854
855    #[tokio::test]
856    async fn test_scripted_tool_rejects_file_redirection() {
857        let tool = build_test_tool();
858        let resp = tool
859            .execute(ToolRequest {
860                commands: "echo data > /tmp/out".to_string(),
861                timeout_ms: None,
862            })
863            .await;
864
865        assert_ne!(resp.exit_code, 0);
866        assert!(
867            resp.stderr.contains("filesystem redirection disabled"),
868            "{}",
869            resp.stderr
870        );
871    }
872
873    #[tokio::test]
874    async fn test_scripted_tool_rejects_file_redirection_before_callback() {
875        use std::sync::atomic::{AtomicUsize, Ordering};
876
877        let calls = Arc::new(AtomicUsize::new(0));
878        let tool_calls = Arc::clone(&calls);
879        let tool = ScriptedTool::builder("test_api")
880            .tool_fn(
881                ToolDef::new("side_effect", "Count calls"),
882                move |_args: &ToolArgs| {
883                    tool_calls.fetch_add(1, Ordering::SeqCst);
884                    Ok("called\n".to_string())
885                },
886            )
887            .build();
888
889        let resp = tool
890            .execute(ToolRequest {
891                commands: "side_effect > /tmp/out".to_string(),
892                timeout_ms: None,
893            })
894            .await;
895
896        assert_ne!(resp.exit_code, 0);
897        assert!(
898            resp.stderr.contains("filesystem redirection disabled"),
899            "{}",
900            resp.stderr
901        );
902        assert_eq!(calls.load(Ordering::SeqCst), 0);
903    }
904
905    #[tokio::test]
906    async fn test_scripted_tool_allows_dev_null_redirection() {
907        let tool = build_test_tool();
908        let resp = tool
909            .execute(ToolRequest {
910                commands: "echo hidden > /dev/null; echo visible".to_string(),
911                timeout_ms: None,
912            })
913            .await;
914
915        assert_eq!(resp.exit_code, 0);
916        assert_eq!(resp.stdout.trim(), "visible");
917    }
918
919    #[tokio::test]
920    async fn test_scripted_tool_rejects_input_redirection() {
921        let tool = build_test_tool();
922        let resp = tool
923            .execute(ToolRequest {
924                commands: "from_stdin < /tmp/in".to_string(),
925                timeout_ms: None,
926            })
927            .await;
928
929        assert_ne!(resp.exit_code, 0);
930        assert!(
931            resp.stderr.contains("filesystem redirection disabled"),
932            "{}",
933            resp.stderr
934        );
935    }
936
937    #[tokio::test]
938    async fn test_scripted_tool_rejects_path_operands_for_dual_use_tools() {
939        let tool = build_test_tool();
940        let resp = tool
941            .execute(ToolRequest {
942                commands: "grep Alice /tmp/users.json".to_string(),
943                timeout_ms: None,
944            })
945            .await;
946
947        assert_ne!(resp.exit_code, 0);
948        let combined = format!("{}{}", resp.stdout, resp.stderr);
949        assert!(
950            combined.contains("filesystem access disabled"),
951            "{}",
952            combined
953        );
954    }
955
956    #[tokio::test]
957    async fn test_scripted_tool_rejects_script_path_execution() {
958        let tool = build_test_tool();
959        let resp = tool
960            .execute(ToolRequest {
961                commands: "/tmp/script.sh".to_string(),
962                timeout_ms: None,
963            })
964            .await;
965
966        assert_eq!(resp.exit_code, 127);
967        assert!(resp.stderr.contains("command not found"), "{}", resp.stderr);
968    }
969
970    #[tokio::test]
971    async fn test_scripted_tool_rejects_process_substitution() {
972        let tool = build_test_tool();
973        let resp = tool
974            .execute(ToolRequest {
975                commands: "from_stdin < <(echo data)".to_string(),
976                timeout_ms: None,
977            })
978            .await;
979
980        assert_ne!(resp.exit_code, 0);
981        assert!(
982            resp.stderr.contains("process substitution disabled"),
983            "{}",
984            resp.stderr
985        );
986    }
987
988    #[tokio::test]
989    async fn test_execute_with_env() {
990        let tool = ScriptedTool::builder("env_test")
991            .env("API_BASE", "https://api.example.com")
992            .tool_fn(ToolDef::new("noop", "No-op"), |_args: &ToolArgs| {
993                Ok("ok\n".to_string())
994            })
995            .build();
996
997        let resp = tool
998            .execute(ToolRequest {
999                commands: "echo $API_BASE".to_string(),
1000                timeout_ms: None,
1001            })
1002            .await;
1003        assert_eq!(resp.exit_code, 0);
1004        assert_eq!(resp.stdout.trim(), "https://api.example.com");
1005    }
1006
1007    #[tokio::test]
1008    async fn test_execute_with_status_callback() {
1009        use std::sync::{Arc, Mutex};
1010
1011        let tool = build_test_tool();
1012        let phases = Arc::new(Mutex::new(Vec::new()));
1013        let phases_clone = phases.clone();
1014
1015        let resp = tool
1016            .execute_with_status(
1017                ToolRequest {
1018                    commands: "get_user --id 1".to_string(),
1019                    timeout_ms: None,
1020                },
1021                Box::new(move |status| {
1022                    phases_clone
1023                        .lock()
1024                        .expect("lock poisoned")
1025                        .push(status.phase.clone());
1026                }),
1027            )
1028            .await;
1029
1030        assert_eq!(resp.exit_code, 0);
1031        let phases = phases.lock().expect("lock poisoned");
1032        assert!(phases.contains(&"validate".to_string()));
1033        assert!(phases.contains(&"execute".to_string()));
1034        assert!(phases.contains(&"complete".to_string()));
1035    }
1036
1037    #[tokio::test]
1038    async fn test_multiple_execute_calls() {
1039        let tool = build_test_tool();
1040
1041        let resp1 = tool
1042            .execute(ToolRequest {
1043                commands: "get_user --id 1 | jq -r '.name'".to_string(),
1044                timeout_ms: None,
1045            })
1046            .await;
1047        assert_eq!(resp1.stdout.trim(), "Alice");
1048
1049        let resp2 = tool
1050            .execute(ToolRequest {
1051                commands: "get_orders --user_id 1 | jq 'length'".to_string(),
1052                timeout_ms: None,
1053            })
1054            .await;
1055        assert_eq!(resp2.stdout.trim(), "2");
1056    }
1057
1058    #[tokio::test]
1059    async fn test_boolean_flag() {
1060        let tool = ScriptedTool::builder("bool_test")
1061            .tool_fn(
1062                ToolDef::new("search", "Search").with_schema(serde_json::json!({
1063                    "type": "object",
1064                    "properties": {
1065                        "query": {"type": "string"},
1066                        "verbose": {"type": "boolean"}
1067                    }
1068                })),
1069                |args: &ToolArgs| {
1070                    let q = args.param_str("query").unwrap_or("");
1071                    let v = args.param_bool("verbose").unwrap_or(false);
1072                    Ok(format!("q={q} verbose={v}\n"))
1073                },
1074            )
1075            .build();
1076
1077        let resp = tool
1078            .execute(ToolRequest {
1079                commands: "search --verbose --query hello".to_string(),
1080                timeout_ms: None,
1081            })
1082            .await;
1083        assert_eq!(resp.exit_code, 0);
1084        assert_eq!(resp.stdout.trim(), "q=hello verbose=true");
1085    }
1086
1087    #[tokio::test]
1088    async fn test_no_schema_treats_as_strings() {
1089        let tool = ScriptedTool::builder("str_test")
1090            .tool_fn(
1091                ToolDef::new("echo_args", "Echo params as JSON"),
1092                |args: &ToolArgs| Ok(format!("{}\n", args.params)),
1093            )
1094            .build();
1095
1096        let resp = tool
1097            .execute(ToolRequest {
1098                commands: "echo_args --name Alice --count 3".to_string(),
1099                timeout_ms: None,
1100            })
1101            .await;
1102        assert_eq!(resp.exit_code, 0);
1103        let parsed: serde_json::Value =
1104            serde_json::from_str(resp.stdout.trim()).expect("stdout should be valid JSON");
1105        assert_eq!(parsed["name"], "Alice");
1106        assert_eq!(parsed["count"], "3"); // string, not int — no schema
1107    }
1108
1109    // -- Shared context tests (#522) --
1110
1111    #[tokio::test]
1112    async fn test_shared_arc_across_callbacks() {
1113        use std::sync::{Arc, Mutex};
1114
1115        let shared = Arc::new("shared-token".to_string());
1116        let call_log = Arc::new(Mutex::new(Vec::<String>::new()));
1117
1118        let s1 = shared.clone();
1119        let log1 = call_log.clone();
1120        let s2 = shared.clone();
1121        let log2 = call_log.clone();
1122
1123        let tool = ScriptedTool::builder("ctx_test")
1124            .tool_fn(
1125                ToolDef::new("tool_a", "First tool"),
1126                move |_args: &ToolArgs| {
1127                    log1.lock().expect("lock").push(format!("a:{}", *s1));
1128                    Ok("a\n".to_string())
1129                },
1130            )
1131            .tool_fn(
1132                ToolDef::new("tool_b", "Second tool"),
1133                move |_args: &ToolArgs| {
1134                    log2.lock().expect("lock").push(format!("b:{}", *s2));
1135                    Ok("b\n".to_string())
1136                },
1137            )
1138            .build();
1139
1140        let resp = tool
1141            .execute(ToolRequest {
1142                commands: "tool_a && tool_b".to_string(),
1143                timeout_ms: None,
1144            })
1145            .await;
1146        assert_eq!(resp.exit_code, 0);
1147        let log = call_log.lock().expect("lock");
1148        assert_eq!(*log, vec!["a:shared-token", "b:shared-token"]);
1149    }
1150
1151    #[tokio::test]
1152    async fn test_mutable_shared_state_across_callbacks() {
1153        use std::sync::{Arc, Mutex};
1154
1155        let counter = Arc::new(Mutex::new(0u64));
1156        let c = counter.clone();
1157
1158        let tool = ScriptedTool::builder("mut_test")
1159            .tool_fn(
1160                ToolDef::new("increment", "Bump counter"),
1161                move |_args: &ToolArgs| {
1162                    let mut count = c.lock().expect("lock");
1163                    *count += 1;
1164                    Ok(format!("{count}\n"))
1165                },
1166            )
1167            .build();
1168
1169        let resp = tool
1170            .execute(ToolRequest {
1171                commands: "increment; increment; increment".to_string(),
1172                timeout_ms: None,
1173            })
1174            .await;
1175        assert_eq!(resp.exit_code, 0);
1176        assert_eq!(*counter.lock().expect("lock"), 3);
1177    }
1178
1179    // -- Fresh interpreter isolation test (#524) --
1180
1181    #[tokio::test]
1182    async fn test_fresh_interpreter_per_execute() {
1183        let tool = ScriptedTool::builder("isolation_test")
1184            .tool_fn(ToolDef::new("noop", "No-op"), |_args: &ToolArgs| {
1185                Ok("ok\n".to_string())
1186            })
1187            .build();
1188
1189        // Set a variable in call 1
1190        let resp1 = tool
1191            .execute(ToolRequest {
1192                commands: "export MY_VAR=hello; echo $MY_VAR".to_string(),
1193                timeout_ms: None,
1194            })
1195            .await;
1196        assert_eq!(resp1.stdout.trim(), "hello");
1197
1198        // Variable should NOT persist to call 2
1199        let resp2 = tool
1200            .execute(ToolRequest {
1201                commands: "echo \">${MY_VAR}<\"".to_string(),
1202                timeout_ms: None,
1203            })
1204            .await;
1205        assert_eq!(resp2.stdout.trim(), "><");
1206    }
1207
1208    #[tokio::test]
1209    async fn test_arc_callback_persists_across_execute_calls() {
1210        use std::sync::{Arc, Mutex};
1211
1212        let counter = Arc::new(Mutex::new(0u64));
1213        let c = counter.clone();
1214
1215        let tool = ScriptedTool::builder("persist_test")
1216            .tool_fn(
1217                ToolDef::new("count", "Count calls"),
1218                move |_args: &ToolArgs| {
1219                    let mut n = c.lock().expect("lock");
1220                    *n += 1;
1221                    Ok(format!("{n}\n"))
1222                },
1223            )
1224            .build();
1225
1226        // Call 1
1227        let resp1 = tool
1228            .execute(ToolRequest {
1229                commands: "count".to_string(),
1230                timeout_ms: None,
1231            })
1232            .await;
1233        assert_eq!(resp1.stdout.trim(), "1");
1234
1235        // Call 2 — counter persists via Arc
1236        let resp2 = tool
1237            .execute(ToolRequest {
1238                commands: "count".to_string(),
1239                timeout_ms: None,
1240            })
1241            .await;
1242        assert_eq!(resp2.stdout.trim(), "2");
1243    }
1244
1245    #[tokio::test]
1246    async fn test_execution_trace_records_help_discover_and_tool_invocations() {
1247        let tool = build_test_tool();
1248
1249        let resp = tool
1250            .execute(ToolRequest {
1251                commands: "discover --search user\nhelp get_user\nget_user --id 42".to_string(),
1252                timeout_ms: None,
1253            })
1254            .await;
1255        assert_eq!(resp.exit_code, 0);
1256
1257        let trace = tool
1258            .take_last_execution_trace()
1259            .expect("execution trace should be recorded");
1260        assert_eq!(trace.invocations.len(), 3);
1261        assert_eq!(trace.invocations[0].name, "discover");
1262        assert_eq!(trace.invocations[0].kind, ScriptedCommandKind::Discover);
1263        assert_eq!(trace.invocations[1].name, "help");
1264        assert_eq!(trace.invocations[1].kind, ScriptedCommandKind::Help);
1265        assert_eq!(trace.invocations[2].name, "get_user");
1266        assert_eq!(trace.invocations[2].kind, ScriptedCommandKind::Tool);
1267    }
1268
1269    // -- Async callback tests --
1270
1271    #[tokio::test]
1272    async fn test_async_tool_basic() {
1273        let tool = ScriptedTool::builder("async_api")
1274            .async_tool_fn(
1275                ToolDef::new("greet", "Greet async").with_schema(serde_json::json!({
1276                    "type": "object",
1277                    "properties": { "name": {"type": "string"} }
1278                })),
1279                |args: ToolArgs| async move {
1280                    let name = args.param_str("name").unwrap_or("world").to_string();
1281                    Ok(format!("hello {name}\n"))
1282                },
1283            )
1284            .build();
1285
1286        let resp = tool
1287            .execute(ToolRequest {
1288                commands: "greet --name Async".to_string(),
1289                timeout_ms: None,
1290            })
1291            .await;
1292        assert_eq!(resp.exit_code, 0);
1293        assert_eq!(resp.stdout.trim(), "hello Async");
1294    }
1295
1296    #[tokio::test]
1297    async fn test_mixed_sync_async_tools() {
1298        let tool = ScriptedTool::builder("mixed")
1299            .tool_fn(ToolDef::new("sync_ping", "Sync"), |_args: &ToolArgs| {
1300                Ok("sync-pong\n".to_string())
1301            })
1302            .async_tool_fn(
1303                ToolDef::new("async_ping", "Async"),
1304                |_args: ToolArgs| async move { Ok("async-pong\n".to_string()) },
1305            )
1306            .build();
1307
1308        let resp = tool
1309            .execute(ToolRequest {
1310                commands: "sync_ping; async_ping".to_string(),
1311                timeout_ms: None,
1312            })
1313            .await;
1314        assert_eq!(resp.exit_code, 0);
1315        assert!(resp.stdout.contains("sync-pong"));
1316        assert!(resp.stdout.contains("async-pong"));
1317    }
1318
1319    #[tokio::test]
1320    async fn test_async_tool_error_propagates() {
1321        let tool = ScriptedTool::builder("err_api")
1322            .sanitize_errors(false)
1323            .async_tool_fn(
1324                ToolDef::new("fail", "Always fails"),
1325                |_args: ToolArgs| async move { Err("async boom".to_string()) },
1326            )
1327            .build();
1328
1329        let resp = tool
1330            .execute(ToolRequest {
1331                commands: "fail".to_string(),
1332                timeout_ms: None,
1333            })
1334            .await;
1335        assert_ne!(resp.exit_code, 0);
1336        assert!(resp.stderr.contains("async boom"));
1337    }
1338
1339    #[tokio::test]
1340    async fn test_async_tool_stdin_pipe() {
1341        let tool = ScriptedTool::builder("pipe_api")
1342            .async_tool_fn(
1343                ToolDef::new("upper", "Uppercase stdin"),
1344                |args: ToolArgs| async move { Ok(args.stdin.unwrap_or_default().to_uppercase()) },
1345            )
1346            .build();
1347
1348        let resp = tool
1349            .execute(ToolRequest {
1350                commands: "echo hello | upper".to_string(),
1351                timeout_ms: None,
1352            })
1353            .await;
1354        assert_eq!(resp.exit_code, 0);
1355        assert!(resp.stdout.contains("HELLO"));
1356    }
1357
1358    // -- ToolImpl registration --
1359
1360    #[tokio::test]
1361    async fn test_tool_impl_in_scripted_tool() {
1362        let get_user = ToolImpl::new(ToolDef::new("get_user", "Fetch user by ID").with_schema(
1363            serde_json::json!({
1364                "type": "object",
1365                "properties": { "id": {"type": "integer"} },
1366                "required": ["id"]
1367            }),
1368        ))
1369        .with_exec_sync(|args| {
1370            let id = args.param_i64("id").ok_or("missing --id")?;
1371            Ok(format!("{{\"id\":{id},\"name\":\"Alice\"}}\n"))
1372        });
1373
1374        let tool = ScriptedTool::builder("api")
1375            .short_description("Test API")
1376            .tool(get_user)
1377            .build();
1378
1379        assert!(tool.system_prompt().contains("get_user"));
1380        assert!(tool.help().contains("get_user"));
1381
1382        let resp = tool
1383            .execute(ToolRequest {
1384                commands: "get_user --id 42 | jq -r '.name'".to_string(),
1385                timeout_ms: None,
1386            })
1387            .await;
1388        assert_eq!(resp.exit_code, 0);
1389        assert_eq!(resp.stdout.trim(), "Alice");
1390    }
1391
1392    #[tokio::test]
1393    async fn test_tool_impl_async_exec_in_scripted_tool() {
1394        let greet = ToolImpl::new(ToolDef::new("greet", "Greet someone").with_schema(
1395            serde_json::json!({
1396                "type": "object",
1397                "properties": { "name": {"type": "string"} }
1398            }),
1399        ))
1400        .with_exec(|args| async move {
1401            let name = args.param_str("name").unwrap_or("world");
1402            Ok(format!("hello {name}\n"))
1403        });
1404
1405        let tool = ScriptedTool::builder("api").tool(greet).build();
1406
1407        let resp = tool
1408            .execute(ToolRequest {
1409                commands: "greet --name Bob".to_string(),
1410                timeout_ms: None,
1411            })
1412            .await;
1413        assert_eq!(resp.exit_code, 0);
1414        assert_eq!(resp.stdout.trim(), "hello Bob");
1415    }
1416
1417    #[tokio::test]
1418    async fn test_tool_impl_mixed_with_tool_fn() {
1419        let tool_impl = ToolImpl::new(ToolDef::new("impl_cmd", "From ToolImpl"))
1420            .with_exec_sync(|_args| Ok("from_impl\n".to_string()));
1421
1422        let tool = ScriptedTool::builder("mixed")
1423            .tool(tool_impl)
1424            .tool_fn(ToolDef::new("fn_cmd", "From tool_fn"), |_args| {
1425                Ok("from_fn\n".to_string())
1426            })
1427            .build();
1428
1429        let resp = tool
1430            .execute(ToolRequest {
1431                commands: "echo $(impl_cmd) $(fn_cmd)".to_string(),
1432                timeout_ms: None,
1433            })
1434            .await;
1435        assert_eq!(resp.exit_code, 0);
1436        assert!(resp.stdout.contains("from_impl"));
1437        assert!(resp.stdout.contains("from_fn"));
1438    }
1439
1440    #[tokio::test]
1441    async fn test_tool_def_extension_registers_tools_help_and_discover_in_bash() {
1442        let extension = ToolDefExtension::builder()
1443            .tool_fn(
1444                ToolDef::new("get_user", "Fetch user by ID")
1445                    .with_schema(serde_json::json!({
1446                        "type": "object",
1447                        "properties": { "id": {"type": "integer"} }
1448                    }))
1449                    .with_category("users")
1450                    .with_tags(&["read"]),
1451                |args: &ToolArgs| {
1452                    let id = args.param_i64("id").ok_or("missing --id")?;
1453                    Ok(format!("{{\"id\":{id}}}\n"))
1454                },
1455            )
1456            .build();
1457
1458        let mut bash = crate::Bash::builder().extension(extension).build();
1459        let result = bash
1460            .exec("discover --category users\nhelp get_user\nget_user --id 7")
1461            .await
1462            .expect("extension commands should execute");
1463
1464        assert_eq!(result.exit_code, 0);
1465        assert!(result.stdout.contains("get_user"));
1466        assert!(result.stdout.contains("Usage: get_user --id <integer>"));
1467        assert!(result.stdout.contains(r#""id":7"#));
1468    }
1469
1470    #[tokio::test]
1471    async fn test_tool_def_extension_builds_have_isolated_invocation_logs() {
1472        // Two separate `build()` calls must NOT share traces — that is the
1473        // cross-tenant isolation contract.
1474        let builder = ToolDefExtension::builder()
1475            .tool_fn(ToolDef::new("echo_arg", "Echo"), |args: &ToolArgs| {
1476                Ok(format!("{}\n", args.param_str("msg").unwrap_or_default()))
1477            });
1478        let ext_a = builder.build();
1479        let ext_b = builder.build();
1480        let handle_a = ext_a.invocation_trace();
1481        let handle_b = ext_b.invocation_trace();
1482
1483        let mut bash_a = crate::Bash::builder().extension(ext_a).build();
1484        let mut bash_b = crate::Bash::builder().extension(ext_b).build();
1485        bash_a
1486            .exec("echo_arg --msg alpha")
1487            .await
1488            .expect("bash a should execute");
1489        bash_b
1490            .exec("echo_arg --msg beta")
1491            .await
1492            .expect("bash b should execute");
1493
1494        let trace_a = handle_a.take_invocations();
1495        let trace_b = handle_b.take_invocations();
1496        assert_eq!(trace_a.len(), 1);
1497        assert_eq!(
1498            trace_a[0].args,
1499            vec!["--msg".to_string(), "alpha".to_string()]
1500        );
1501        assert_eq!(trace_b.len(), 1);
1502        assert_eq!(
1503            trace_b[0].args,
1504            vec!["--msg".to_string(), "beta".to_string()]
1505        );
1506    }
1507
1508    #[tokio::test]
1509    async fn test_tool_def_extension_clones_have_isolated_invocation_logs() {
1510        // Cloning copies command configuration only; trace sharing requires an
1511        // explicit ToolDefInvocationTrace handle.
1512        let extension = ToolDefExtension::builder()
1513            .tool_fn(ToolDef::new("echo_arg", "Echo"), |args: &ToolArgs| {
1514                Ok(format!("{}\n", args.param_str("msg").unwrap_or_default()))
1515            })
1516            .build();
1517        let clone = extension.clone();
1518        let extension_trace = extension.invocation_trace();
1519        let clone_trace = clone.invocation_trace();
1520
1521        let mut bash_a = crate::Bash::builder().extension(extension).build();
1522        let mut bash_b = crate::Bash::builder().extension(clone).build();
1523        bash_a
1524            .exec("echo_arg --msg gamma")
1525            .await
1526            .expect("bash a should execute");
1527        bash_b
1528            .exec("echo_arg --msg delta")
1529            .await
1530            .expect("bash b should execute");
1531
1532        let trace_a = extension_trace.take_invocations();
1533        let trace_b = clone_trace.take_invocations();
1534        assert_eq!(trace_a.len(), 1);
1535        assert_eq!(
1536            trace_a[0].args,
1537            vec!["--msg".to_string(), "gamma".to_string()]
1538        );
1539        assert_eq!(trace_b.len(), 1);
1540        assert_eq!(
1541            trace_b[0].args,
1542            vec!["--msg".to_string(), "delta".to_string()]
1543        );
1544    }
1545
1546    #[tokio::test]
1547    async fn test_tool_def_extension_invocations_are_bounded_and_truncated() {
1548        let extension = ToolDefExtension::builder()
1549            .tool_fn(ToolDef::new("noop", "No-op"), |_args: &ToolArgs| {
1550                Ok("ok\n".to_string())
1551            })
1552            .build();
1553        let handle = extension.invocation_trace();
1554        let mut bash = crate::Bash::builder().extension(extension).build();
1555
1556        for _ in 0..300 {
1557            let cmd = format!("noop --msg {}", "x".repeat(1500));
1558            bash.exec(&cmd).await.expect("noop should execute");
1559        }
1560        let trace = handle.take_invocations();
1561        assert_eq!(trace.len(), 256, "log must be capped at MAX_LOG_ENTRIES");
1562        assert_eq!(
1563            trace[0].args[1].len(),
1564            1024,
1565            "long argv tokens must be truncated to MAX_LOG_ARG_BYTES"
1566        );
1567    }
1568
1569    #[tokio::test]
1570    async fn test_tool_def_extension_truncation_is_byte_aware_for_utf8() {
1571        // 4-byte UTF-8 codepoint (U+1F600 😀); 400 of them = 1600 bytes,
1572        // well over the 1024 cap. Truncation must (a) cap by bytes,
1573        // not chars, and (b) leave a valid UTF-8 string.
1574        let extension = ToolDefExtension::builder()
1575            .tool_fn(ToolDef::new("noop", "No-op"), |_args: &ToolArgs| {
1576                Ok("ok\n".to_string())
1577            })
1578            .build();
1579        let handle = extension.invocation_trace();
1580        let mut bash = crate::Bash::builder().extension(extension).build();
1581
1582        let big = "\u{1F600}".repeat(400);
1583        let cmd = format!("noop --msg {}", big);
1584        bash.exec(&cmd).await.expect("noop should execute");
1585
1586        let trace = handle.take_invocations();
1587        assert_eq!(trace.len(), 1);
1588        let truncated = &trace[0].args[1];
1589        assert!(truncated.len() <= 1024, "byte length must respect cap");
1590        // Each codepoint is 4 bytes; 1024 / 4 = 256 codepoints fit.
1591        assert_eq!(truncated.chars().count(), 256);
1592    }
1593
1594    // -- Issue #1278: --help flag tests --
1595
1596    #[tokio::test]
1597    async fn test_tool_help_flag_returns_help_text() {
1598        let tool = build_test_tool();
1599        let resp = tool
1600            .execute(ToolRequest {
1601                commands: "get_user --help".to_string(),
1602                timeout_ms: None,
1603            })
1604            .await;
1605        assert_eq!(resp.exit_code, 0);
1606        assert!(
1607            resp.stdout.contains("get_user"),
1608            "help should include tool name"
1609        );
1610        assert!(
1611            resp.stdout.contains("Fetch user by id"),
1612            "help should include description"
1613        );
1614        assert!(
1615            resp.stdout.contains("--id"),
1616            "help should include parameter flags"
1617        );
1618    }
1619
1620    #[tokio::test]
1621    async fn test_tool_help_flag_does_not_invoke_callback() {
1622        let tool = build_test_tool();
1623        // fail_tool always returns an error, but --help should not invoke it
1624        let resp = tool
1625            .execute(ToolRequest {
1626                commands: "fail_tool --help".to_string(),
1627                timeout_ms: None,
1628            })
1629            .await;
1630        assert_eq!(
1631            resp.exit_code, 0,
1632            "--help should succeed even for fail_tool"
1633        );
1634        assert!(
1635            resp.stdout.contains("Always fails"),
1636            "help should include description"
1637        );
1638    }
1639
1640    #[tokio::test]
1641    async fn test_tool_help_flag_same_as_help_builtin() {
1642        let tool = build_test_tool();
1643        let help_output = tool
1644            .execute(ToolRequest {
1645                commands: "help get_user".to_string(),
1646                timeout_ms: None,
1647            })
1648            .await;
1649        let flag_output = tool
1650            .execute(ToolRequest {
1651                commands: "get_user --help".to_string(),
1652                timeout_ms: None,
1653            })
1654            .await;
1655        assert_eq!(
1656            help_output.stdout, flag_output.stdout,
1657            "`--help` should produce same output as `help <tool>`"
1658        );
1659    }
1660
1661    #[tokio::test]
1662    async fn test_tool_help_flag_stripped_from_args() {
1663        let tool = build_test_tool();
1664        // get_user --help --id 42 should not call the callback with --help in args
1665        let resp = tool
1666            .execute(ToolRequest {
1667                commands: "get_user --help --id 42".to_string(),
1668                timeout_ms: None,
1669            })
1670            .await;
1671        assert_eq!(resp.exit_code, 0);
1672        // Output should be help text, not the callback result
1673        assert!(resp.stdout.contains("Fetch user by id"));
1674        assert!(
1675            !resp.stdout.contains("Alice"),
1676            "callback should NOT be invoked"
1677        );
1678    }
1679
1680    // -- Issue #1279: --dry-run flag tests --
1681
1682    #[tokio::test]
1683    async fn test_dry_run_validates_args() {
1684        let tool = build_test_tool();
1685        let resp = tool
1686            .execute(ToolRequest {
1687                commands: "get_user --dry-run --id 42".to_string(),
1688                timeout_ms: None,
1689            })
1690            .await;
1691        assert_eq!(resp.exit_code, 0);
1692        let parsed: serde_json::Value =
1693            serde_json::from_str(resp.stdout.trim()).expect("stdout should be valid JSON");
1694        assert_eq!(parsed["dry_run"], true);
1695        assert_eq!(parsed["valid"], true);
1696        assert_eq!(parsed["params"]["id"], 42);
1697    }
1698
1699    #[tokio::test]
1700    async fn test_dry_run_does_not_invoke_callback() {
1701        let tool = build_test_tool();
1702        // fail_tool always errors, but --dry-run should succeed
1703        let resp = tool
1704            .execute(ToolRequest {
1705                commands: "fail_tool --dry-run".to_string(),
1706                timeout_ms: None,
1707            })
1708            .await;
1709        assert_eq!(
1710            resp.exit_code, 0,
1711            "--dry-run should not invoke the callback"
1712        );
1713    }
1714
1715    #[tokio::test]
1716    async fn test_dry_run_help_precedence() {
1717        let tool = build_test_tool();
1718        let resp = tool
1719            .execute(ToolRequest {
1720                commands: "get_user --help --dry-run".to_string(),
1721                timeout_ms: None,
1722            })
1723            .await;
1724        assert_eq!(resp.exit_code, 0);
1725        // Should return help text, not dry-run JSON
1726        assert!(
1727            resp.stdout.contains("Fetch user by id"),
1728            "should show help text"
1729        );
1730        assert!(
1731            !resp.stdout.contains("dry_run"),
1732            "should NOT show dry-run JSON"
1733        );
1734    }
1735
1736    #[tokio::test]
1737    async fn test_custom_dry_run_handler() {
1738        let tool = ScriptedTool::builder("dr_test")
1739            .tool_with_dry_run(
1740                ToolDef::new("check", "Validate input").with_schema(serde_json::json!({
1741                    "type": "object",
1742                    "properties": { "id": {"type": "integer"} }
1743                })),
1744                |args: &ToolArgs| {
1745                    let id = args.param_i64("id").ok_or("missing --id")?;
1746                    Ok(format!("executed {id}\n"))
1747                },
1748                |args: &ToolArgs| {
1749                    let id = args.param_i64("id").ok_or("missing --id")?;
1750                    Ok(format!("custom-dry-run id={id}\n"))
1751                },
1752            )
1753            .build();
1754
1755        let resp = tool
1756            .execute(ToolRequest {
1757                commands: "check --dry-run --id 7".to_string(),
1758                timeout_ms: None,
1759            })
1760            .await;
1761        assert_eq!(resp.exit_code, 0);
1762        assert_eq!(resp.stdout.trim(), "custom-dry-run id=7");
1763    }
1764
1765    #[tokio::test]
1766    async fn test_custom_dry_run_handler_sanitizes_errors() {
1767        let tool = ScriptedTool::builder("dr_sanitize")
1768            .tool_with_dry_run(
1769                ToolDef::new("check", "Validate input"),
1770                |_args: &ToolArgs| Ok("ok\n".to_string()),
1771                |_args: &ToolArgs| {
1772                    Err("sensitive: /tmp/token.txt postgres://user:pass@localhost/db".to_string())
1773                },
1774            )
1775            .build();
1776
1777        let resp = tool
1778            .execute(ToolRequest {
1779                commands: "check --dry-run".to_string(),
1780                timeout_ms: None,
1781            })
1782            .await;
1783        assert_eq!(resp.exit_code, 1);
1784        assert!(resp.stderr.contains("callback failed"));
1785        assert!(!resp.stderr.contains("sensitive"));
1786        assert!(!resp.stderr.contains("postgres://"));
1787    }
1788
1789    #[tokio::test]
1790    async fn test_help_flag_returns_help() {
1791        let tool = build_test_tool();
1792        let resp = tool
1793            .execute(ToolRequest {
1794                commands: "get_user --help".to_string(),
1795                timeout_ms: None,
1796            })
1797            .await;
1798        assert_eq!(resp.exit_code, 0);
1799        assert!(
1800            resp.stdout.contains("get_user"),
1801            "help should include tool name"
1802        );
1803        assert!(
1804            resp.stdout.contains("Fetch user by id"),
1805            "help should include description"
1806        );
1807        assert!(
1808            resp.stdout.contains("--id"),
1809            "help should include parameter flags"
1810        );
1811    }
1812}