agentkit_tool_shell/
lib.rs

1//! Shell execution tool for agentkit agent loops.
2//!
3//! This crate provides [`ShellExecTool`], a tool that spawns subprocesses and
4//! captures their stdout, stderr, exit code, and success status.  It supports
5//! custom working directories, environment variables, per-invocation timeouts,
6//! and cooperative turn cancellation through [`agentkit_tools_core::ToolContext`].
7//!
8//! The easiest way to get started is with the [`registry()`] helper, which
9//! returns a [`ToolRegistry`] pre-loaded with the `shell.exec` tool.
10//!
11//! Pair the tool with [`CommandPolicy`](agentkit_tools_core::CommandPolicy) from
12//! `agentkit-tools-core` when you need fine-grained control over which
13//! executables, working directories, and environment variables are permitted.
14//!
15//! # Example
16//!
17//! ```rust
18//! use agentkit_tool_shell::{registry, ShellExecTool};
19//! use agentkit_tools_core::Tool;
20//!
21//! // Build a registry that contains the shell.exec tool.
22//! let reg = registry();
23//! let specs = reg.specs();
24//! assert!(specs.iter().any(|s| s.name.0 == "shell.exec"));
25//!
26//! // Or construct the tool manually and register it yourself.
27//! let tool = ShellExecTool::default();
28//! assert_eq!(tool.spec().name.0, "shell.exec");
29//! ```
30
31use std::collections::BTreeMap;
32use std::path::PathBuf;
33use std::time::Duration;
34
35use agentkit_core::{MetadataMap, ToolOutput, ToolResultPart};
36use agentkit_tools_core::{
37    PermissionRequest, ShellPermissionRequest, Tool, ToolAnnotations, ToolContext, ToolError,
38    ToolName, ToolRegistry, ToolRequest, ToolResult, ToolSpec,
39};
40use async_trait::async_trait;
41use serde::Deserialize;
42use serde_json::json;
43use tokio::process::Command;
44use tokio::time::timeout;
45
46/// Creates a [`ToolRegistry`] pre-populated with [`ShellExecTool`].
47///
48/// This is the simplest way to add shell execution to an agent.  The returned
49/// registry contains a single tool registered under the name `shell.exec`.
50///
51/// # Example
52///
53/// ```rust
54/// use agentkit_tool_shell::registry;
55///
56/// let reg = registry();
57/// assert_eq!(reg.specs().len(), 1);
58/// assert_eq!(reg.specs()[0].name.0, "shell.exec");
59/// ```
60pub fn registry() -> ToolRegistry {
61    ToolRegistry::new().with(ShellExecTool::default())
62}
63
64/// A tool that executes shell commands as subprocesses.
65///
66/// `ShellExecTool` implements the [`Tool`] trait and is registered under the
67/// name `shell.exec`.  When invoked it spawns the requested executable, waits
68/// for it to finish (respecting an optional timeout and turn cancellation), and
69/// returns a structured JSON object with `stdout`, `stderr`, `success`, and
70/// `exit_code` fields.
71///
72/// Before execution the tool emits a [`ShellPermissionRequest`] so that
73/// permission policies (e.g. [`CommandPolicy`](agentkit_tools_core::CommandPolicy))
74/// can allow, deny, or require approval for the command.
75///
76/// # Input schema
77///
78/// | Field          | Type              | Required | Description                              |
79/// |----------------|-------------------|----------|------------------------------------------|
80/// | `executable`   | `string`          | yes      | Program to run.                          |
81/// | `argv`         | `[string]`        | no       | Arguments passed to the executable.      |
82/// | `cwd`          | `string`          | no       | Working directory for the subprocess.    |
83/// | `env`          | `{string:string}` | no       | Extra environment variables.             |
84/// | `timeout_ms`   | `integer`         | no       | Maximum wall-clock time in milliseconds. |
85///
86/// # Example
87///
88/// ```rust
89/// use agentkit_tool_shell::ShellExecTool;
90/// use agentkit_tools_core::ToolRegistry;
91///
92/// let mut reg = ToolRegistry::new();
93/// reg.register(ShellExecTool::default());
94///
95/// let spec = &reg.specs()[0];
96/// assert_eq!(spec.name.0, "shell.exec");
97/// assert!(spec.annotations.destructive_hint);
98/// ```
99#[derive(Clone, Debug)]
100pub struct ShellExecTool {
101    spec: ToolSpec,
102}
103
104impl Default for ShellExecTool {
105    fn default() -> Self {
106        Self {
107            spec: ToolSpec {
108                name: ToolName::new("shell.exec"),
109                description: "Execute a shell command and capture stdout, stderr, and exit status."
110                    .into(),
111                input_schema: json!({
112                    "type": "object",
113                    "properties": {
114                        "executable": { "type": "string" },
115                        "argv": {
116                            "type": "array",
117                            "items": { "type": "string" },
118                            "default": []
119                        },
120                        "cwd": { "type": "string" },
121                        "env": {
122                            "type": "object",
123                            "additionalProperties": { "type": "string" }
124                        },
125                        "timeout_ms": { "type": "integer", "minimum": 1 }
126                    },
127                    "required": ["executable"],
128                    "additionalProperties": false
129                }),
130                annotations: ToolAnnotations {
131                    destructive_hint: true,
132                    needs_approval_hint: true,
133                    ..ToolAnnotations::default()
134                },
135                metadata: MetadataMap::new(),
136            },
137        }
138    }
139}
140
141#[derive(Debug, Deserialize)]
142struct ShellExecInput {
143    executable: String,
144    #[serde(default)]
145    argv: Vec<String>,
146    cwd: Option<PathBuf>,
147    #[serde(default)]
148    env: BTreeMap<String, String>,
149    timeout_ms: Option<u64>,
150}
151
152#[async_trait]
153impl Tool for ShellExecTool {
154    /// Returns the [`ToolSpec`] describing the `shell.exec` tool.
155    fn spec(&self) -> &ToolSpec {
156        &self.spec
157    }
158
159    /// Extracts a [`ShellPermissionRequest`] from the incoming [`ToolRequest`].
160    ///
161    /// The returned request is evaluated by the active
162    /// [`PermissionChecker`](agentkit_tools_core::PermissionChecker) before
163    /// [`invoke`](Self::invoke) runs, giving policies such as
164    /// [`CommandPolicy`](agentkit_tools_core::CommandPolicy) a chance to allow
165    /// or deny the command.
166    ///
167    /// # Errors
168    ///
169    /// Returns [`ToolError::InvalidInput`] if the request input cannot be
170    /// deserialized into the expected schema.
171    fn proposed_requests(
172        &self,
173        request: &ToolRequest,
174    ) -> Result<Vec<Box<dyn PermissionRequest>>, ToolError> {
175        let input: ShellExecInput = parse_input(request)?;
176        Ok(vec![Box::new(ShellPermissionRequest {
177            executable: input.executable,
178            argv: input.argv,
179            cwd: input.cwd,
180            env_keys: input.env.keys().cloned().collect(),
181            metadata: request.metadata.clone(),
182        })])
183    }
184
185    /// Spawns the requested command and returns its output.
186    ///
187    /// The subprocess is spawned with `kill_on_drop(true)` so it is cleaned up
188    /// if the future is cancelled.  When a `timeout_ms` is specified in the
189    /// input the command is aborted after that duration.  If a turn
190    /// cancellation token is present in the [`ToolContext`] the command is also
191    /// aborted when the turn is cancelled.
192    ///
193    /// On success the returned [`ToolResult`] contains a JSON object:
194    ///
195    /// ```json
196    /// {
197    ///   "stdout": "...",
198    ///   "stderr": "...",
199    ///   "success": true,
200    ///   "exit_code": 0
201    /// }
202    /// ```
203    ///
204    /// # Errors
205    ///
206    /// * [`ToolError::InvalidInput`] -- the request input does not match the schema.
207    /// * [`ToolError::ExecutionFailed`] -- the command could not be spawned or timed out.
208    /// * [`ToolError::Cancelled`] -- the turn was cancelled while the command was running.
209    async fn invoke(
210        &self,
211        request: ToolRequest,
212        ctx: &mut ToolContext<'_>,
213    ) -> Result<ToolResult, ToolError> {
214        let input: ShellExecInput = parse_input(&request)?;
215        let mut command = Command::new(&input.executable);
216        command.args(&input.argv);
217        command.kill_on_drop(true);
218        if let Some(cwd) = &input.cwd {
219            command.current_dir(cwd);
220        }
221        for (key, value) in &input.env {
222            command.env(key, value);
223        }
224
225        let duration_start = std::time::Instant::now();
226        let output_future = command.output();
227        tokio::pin!(output_future);
228
229        let output = if let Some(timeout_ms) = input.timeout_ms {
230            if let Some(cancellation) = ctx.cancellation.clone() {
231                tokio::select! {
232                    result = &mut output_future => result.map_err(|error| {
233                        ToolError::ExecutionFailed(format!("failed to spawn command: {error}"))
234                    })?,
235                    _ = cancellation.cancelled() => return Err(ToolError::Cancelled),
236                    _ = tokio::time::sleep(Duration::from_millis(timeout_ms)) => {
237                        return Err(ToolError::ExecutionFailed(format!("command timed out after {timeout_ms}ms")));
238                    }
239                }
240            } else {
241                timeout(Duration::from_millis(timeout_ms), &mut output_future)
242                    .await
243                    .map_err(|_| {
244                        ToolError::ExecutionFailed(format!(
245                            "command timed out after {timeout_ms}ms"
246                        ))
247                    })?
248                    .map_err(|error| {
249                        ToolError::ExecutionFailed(format!("failed to spawn command: {error}"))
250                    })?
251            }
252        } else if let Some(cancellation) = ctx.cancellation.clone() {
253            tokio::select! {
254                result = &mut output_future => result.map_err(|error| {
255                    ToolError::ExecutionFailed(format!("failed to spawn command: {error}"))
256                })?,
257                _ = cancellation.cancelled() => return Err(ToolError::Cancelled),
258            }
259        } else {
260            output_future.await.map_err(|error| {
261                ToolError::ExecutionFailed(format!("failed to spawn command: {error}"))
262            })?
263        };
264
265        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
266        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
267        let status = output.status.code();
268        let success = output.status.success();
269
270        Ok(ToolResult {
271            result: ToolResultPart {
272                call_id: request.call_id,
273                output: ToolOutput::Structured(json!({
274                    "stdout": stdout,
275                    "stderr": stderr,
276                    "success": success,
277                    "exit_code": status,
278                })),
279                is_error: !success,
280                metadata: MetadataMap::new(),
281            },
282            duration: Some(duration_start.elapsed()),
283            metadata: MetadataMap::new(),
284        })
285    }
286}
287
288fn parse_input(request: &ToolRequest) -> Result<ShellExecInput, ToolError> {
289    serde_json::from_value(request.input.clone())
290        .map_err(|error| ToolError::InvalidInput(format!("invalid tool input: {error}")))
291}
292
293#[cfg(test)]
294mod tests {
295    use agentkit_capabilities::CapabilityContext;
296    use agentkit_core::{SessionId, TurnId};
297    use agentkit_tools_core::{
298        BasicToolExecutor, PermissionChecker, PermissionCode, PermissionDecision, PermissionDenial,
299        ToolExecutionOutcome, ToolExecutor,
300    };
301
302    use super::*;
303
304    struct AllowAll;
305
306    impl PermissionChecker for AllowAll {
307        fn evaluate(
308            &self,
309            _request: &dyn agentkit_tools_core::PermissionRequest,
310        ) -> PermissionDecision {
311            PermissionDecision::Allow
312        }
313    }
314
315    struct DenyCommands;
316
317    impl PermissionChecker for DenyCommands {
318        fn evaluate(
319            &self,
320            _request: &dyn agentkit_tools_core::PermissionRequest,
321        ) -> PermissionDecision {
322            PermissionDecision::Deny(PermissionDenial {
323                code: PermissionCode::CommandNotAllowed,
324                message: "commands denied in test".into(),
325                metadata: MetadataMap::new(),
326            })
327        }
328    }
329
330    #[tokio::test]
331    async fn shell_tool_executes_and_captures_output() {
332        let executor = BasicToolExecutor::new(registry());
333        let metadata = MetadataMap::new();
334        let mut ctx = ToolContext {
335            capability: CapabilityContext {
336                session_id: Some(&SessionId::new("session-1")),
337                turn_id: Some(&TurnId::new("turn-1")),
338                metadata: &metadata,
339            },
340            permissions: &AllowAll,
341            resources: &(),
342            cancellation: None,
343        };
344
345        let result = executor
346            .execute(
347                ToolRequest {
348                    call_id: "call-1".into(),
349                    tool_name: ToolName::new("shell.exec"),
350                    input: json!({
351                        "executable": "sh",
352                        "argv": ["-c", "printf hello"]
353                    }),
354                    session_id: "session-1".into(),
355                    turn_id: "turn-1".into(),
356                    metadata: MetadataMap::new(),
357                },
358                &mut ctx,
359            )
360            .await;
361
362        match result {
363            ToolExecutionOutcome::Completed(result) => {
364                let value = match result.result.output {
365                    ToolOutput::Structured(value) => value,
366                    other => panic!("unexpected output: {other:?}"),
367                };
368                assert_eq!(value["stdout"], "hello");
369                assert_eq!(value["success"], true);
370            }
371            other => panic!("unexpected outcome: {other:?}"),
372        }
373    }
374
375    #[tokio::test]
376    async fn shell_tool_respects_permission_denial() {
377        let executor = BasicToolExecutor::new(registry());
378        let metadata = MetadataMap::new();
379        let mut ctx = ToolContext {
380            capability: CapabilityContext {
381                session_id: Some(&SessionId::new("session-1")),
382                turn_id: Some(&TurnId::new("turn-1")),
383                metadata: &metadata,
384            },
385            permissions: &DenyCommands,
386            resources: &(),
387            cancellation: None,
388        };
389
390        let result = executor
391            .execute(
392                ToolRequest {
393                    call_id: "call-2".into(),
394                    tool_name: ToolName::new("shell.exec"),
395                    input: json!({
396                        "executable": "sh",
397                        "argv": ["-c", "printf nope"]
398                    }),
399                    session_id: "session-1".into(),
400                    turn_id: "turn-1".into(),
401                    metadata: MetadataMap::new(),
402                },
403                &mut ctx,
404            )
405            .await;
406
407        assert!(matches!(
408            result,
409            ToolExecutionOutcome::Failed(ToolError::PermissionDenied(_))
410        ));
411    }
412}
agentkit_tool_shell/lib.rs

agentkit_tool_shell/
lib.rs