Skip to main content

harness_tools_shell/
lib.rs

1//! Risk-classified shell tools.
2//!
3//! Two flavours:
4//! - `ShellRead` — restricted to a readable allowlist (`cargo check`, `git status`, …)
5//! - `ShellExec` — full subprocess. `Destructive` risk; surface explicitly.
6//!
7//! Both run via `world.runner` so they're trivially mockable in tests.
8
9use async_trait::async_trait;
10use harness_core::{Tool, ToolError, ToolResult, ToolRisk, ToolSchema, World};
11use once_cell::sync::Lazy;
12use serde::Deserialize;
13use serde_json::json;
14
15#[derive(Deserialize)]
16struct ShellArgs {
17    program: String,
18    #[serde(default)]
19    args: Vec<String>,
20    #[serde(default)]
21    cwd: Option<String>,
22}
23
24// ---------- ShellRead ----------
25
26pub struct ShellRead;
27
28/// Per-program safe-argument matchers. `ShellRead` will refuse any program
29/// not listed here AND any args that fail the matcher.
30///
31/// Returns `Ok(())` if `args` are safe for `program`, else an error message.
32fn check_safe_args(program: &str, args: &[String]) -> Result<(), String> {
33    match program {
34        "cargo" => match args.first().map(String::as_str) {
35            Some(
36                "check" | "test" | "build" | "fmt" | "clippy" | "doc" | "tree" | "metadata"
37                | "search" | "audit" | "deny" | "outdated" | "bench" | "nextest" | "vendor",
38            ) => Ok(()),
39            Some(
40                "install" | "uninstall" | "publish" | "yank" | "owner" | "login" | "logout"
41                | "package",
42            ) => Err(format!("`cargo {}` is not read-only", args[0])),
43            Some(s) => Err(format!(
44                "`cargo {s}` not in shell_read subcommand allowlist (use shell_exec for writes)"
45            )),
46            None => Err("cargo needs a subcommand".into()),
47        },
48        "git" => match args.first().map(String::as_str) {
49            Some(
50                "status" | "log" | "show" | "diff" | "blame" | "rev-parse" | "ls-files" | "ls-tree"
51                | "describe" | "branch" | "remote" | "config" | "shortlog" | "tag",
52            ) => {
53                // Reject `git config <key> <value>` (write) — accept `git config <key>` (read).
54                if args[0] == "config" && args.len() >= 3 && !args[1].starts_with('-') {
55                    Err("`git config <k> <v>` is a write — use shell_exec".into())
56                } else {
57                    Ok(())
58                }
59            }
60            Some(s) => Err(format!("`git {s}` is not in the read-only subcommand list")),
61            None => Err("git needs a subcommand".into()),
62        },
63        // Language tool-chain inspectors: `--version`, `info`, `list`, etc.
64        // are universally safe; install/publish/run are NOT (use shell_exec).
65        "npm" | "pnpm" | "yarn" | "bun" => match args.first().map(String::as_str) {
66            Some(
67                "ls" | "list" | "view" | "info" | "config" | "outdated" | "audit" | "doctor"
68                | "search" | "ping" | "whoami" | "--version" | "-v",
69            ) => Ok(()),
70            Some(
71                "install" | "i" | "add" | "uninstall" | "remove" | "rm" | "publish" | "pack"
72                | "run" | "exec" | "dlx" | "create" | "link" | "unlink" | "version" | "deprecate"
73                | "owner" | "login" | "logout" | "init",
74            ) => Err(format!(
75                "`{program} {}` mutates state; use shell_exec",
76                args[0]
77            )),
78            Some(s) => Err(format!("`{program} {s}` not in shell_read allowlist")),
79            None => Err(format!("`{program}` needs a subcommand")),
80        },
81        "python" | "python3" | "uv" | "pip" | "pip3" => match args.first().map(String::as_str) {
82            // Read-only enquiry forms only.
83            Some("--version" | "-V") => Ok(()),
84            // `pip list / show / check / config get` are read; install/uninstall/wheel are NOT.
85            Some(
86                "list" | "show" | "check" | "freeze" | "config" | "search" | "index" | "--help",
87            ) if program.starts_with("pip") => {
88                if args[0] == "config"
89                    && args
90                        .iter()
91                        .skip(1)
92                        .any(|a| matches!(a.as_str(), "set" | "unset" | "edit"))
93                {
94                    Err("`pip config set/unset/edit` mutates state".into())
95                } else {
96                    Ok(())
97                }
98            }
99            // Reject everything else for python/uv — `python -c` and `python script.py` execute arbitrary code.
100            Some(_s) => Err(format!(
101                "`{program}` runs arbitrary code via shell_read — use shell_exec or wrap in a Rust tool"
102            )),
103            None => Err(format!("`{program}` needs a subcommand")),
104        },
105        "node" | "deno" => match args.first().map(String::as_str) {
106            Some("--version" | "-v") => Ok(()),
107            _ => Err(format!(
108                "`{program}` evaluates arbitrary code — use shell_exec"
109            )),
110        },
111        "go" => match args.first().map(String::as_str) {
112            Some("version" | "env" | "list" | "vet" | "doc" | "fmt" | "mod") => {
113                if args[0] == "mod"
114                    && args.iter().skip(1).any(|a| {
115                        matches!(a.as_str(), "init" | "tidy" | "edit" | "download" | "vendor")
116                    })
117                {
118                    Err("`go mod init/tidy/...` mutates state".into())
119                } else {
120                    Ok(())
121                }
122            }
123            Some("test" | "build" | "run" | "install" | "get" | "generate") => {
124                Err(format!("`go {}` builds/installs; use shell_exec", args[0]))
125            }
126            Some(s) => Err(format!("`go {s}` not in shell_read allowlist")),
127            None => Err("go needs a subcommand".into()),
128        },
129        "make" => match args.first().map(String::as_str) {
130            Some("--version" | "-n" | "--dry-run") => Ok(()),
131            _ => Err("`make` runs arbitrary targets — use shell_exec".into()),
132        },
133        "docker" | "podman" | "kubectl" => match args.first().map(String::as_str) {
134            // Read-only container/k8s inspection.
135            Some(
136                "ps" | "images" | "version" | "info" | "history" | "inspect" | "logs" | "stats"
137                | "top" | "port" | "diff" | "search",
138            ) => Ok(()),
139            Some(
140                "get" | "describe" | "explain" | "config" | "api-resources" | "api-versions"
141                | "cluster-info" | "events",
142            ) if program == "kubectl" => {
143                if args[0] == "config"
144                    && args.iter().skip(1).any(|a| {
145                        matches!(
146                            a.as_str(),
147                            "set"
148                                | "set-cluster"
149                                | "set-context"
150                                | "delete-context"
151                                | "use-context"
152                        )
153                    })
154                {
155                    Err("`kubectl config set/...` mutates state".into())
156                } else {
157                    Ok(())
158                }
159            }
160            Some(s) => Err(format!("`{program} {s}` not in shell_read allowlist")),
161            None => Err(format!("`{program}` needs a subcommand")),
162        },
163        // Pure read commands — no arg filter beyond not allowing -exec
164        "ls" | "pwd" | "rustc" | "rustup" | "rg" | "fd" | "wc" | "head" | "tail" | "cat"
165        | "grep" | "tree" | "stat" | "file" | "du" | "df" | "ps" | "uname" | "hostname"
166        | "date" | "env" | "which" | "whereis" => {
167            // Block xargs-style execution hand-offs.
168            if args
169                .iter()
170                .any(|a| a.contains("-exec") || a.contains("--exec"))
171            {
172                Err(format!(
173                    "`{program}` with -exec is not allowed via shell_read"
174                ))
175            } else {
176                Ok(())
177            }
178        }
179        "find" => {
180            // `find -exec`, `-delete`, `-fprint` are write-equivalent.
181            for a in args {
182                let lower = a.as_str();
183                if matches!(
184                    lower,
185                    "-exec" | "-execdir" | "-delete" | "-fprint" | "-fprintf" | "-ok" | "-okdir"
186                ) {
187                    return Err(format!("`find {lower}` mutates state; use shell_exec"));
188                }
189            }
190            Ok(())
191        }
192        other => Err(format!("`{other}` is not in the read program allowlist")),
193    }
194}
195
196/// Programs that pass the program-name gate (the args are still validated per-program).
197const READ_PROGRAMS: &[&str] = &[
198    "cargo", "git", "ls", "pwd", "rustc", "rustup", "rg", "fd", "wc", "find", "head", "tail",
199    "cat", "grep", "tree", "stat", "file", "du", "df", "ps", "uname", "hostname", "date", "env",
200    "which", "whereis", "npm", "pnpm", "yarn", "bun", "python", "python3", "uv", "pip", "pip3",
201    "node", "deno", "go", "make", "docker", "podman", "kubectl",
202];
203
204static SHELL_READ_SCHEMA: Lazy<ToolSchema> = Lazy::new(|| ToolSchema {
205    name: "shell_read".into(),
206    description: format!(
207        "Run a read-only program. Allowed programs: {}. Each program has a \
208         curated allowlist of safe subcommands (cargo check/test/clippy/fmt; \
209         git status/log/diff/blame; etc.). Write-equivalents like \
210         `cargo install`, `git config <k> <v>`, `find -exec/-delete` are rejected.",
211        READ_PROGRAMS.join(", ")
212    ),
213    input: json!({
214        "type": "object",
215        "properties": {
216            "program": {"type": "string"},
217            "args":    {"type": "array", "items": {"type": "string"}},
218            "cwd":     {"type": "string", "description": "Path relative to workspace root"}
219        },
220        "required": ["program"]
221    }),
222});
223
224#[async_trait]
225impl Tool for ShellRead {
226    fn name(&self) -> &str {
227        "shell_read"
228    }
229    fn schema(&self) -> &ToolSchema {
230        &SHELL_READ_SCHEMA
231    }
232    fn risk(&self) -> ToolRisk {
233        ToolRisk::ReadOnly
234    }
235
236    async fn invoke(
237        &self,
238        args: serde_json::Value,
239        world: &mut World,
240    ) -> Result<ToolResult, ToolError> {
241        let a: ShellArgs = serde_json::from_value(args).map_err(|e| ToolError::InvalidArgs {
242            name: self.name().into(),
243            reason: e.to_string(),
244        })?;
245        check_safe_args(&a.program, &a.args).map_err(ToolError::Permission)?;
246        run(&a, world).await
247    }
248}
249
250// ---------- ShellExec (destructive) ----------
251
252pub struct ShellExec;
253
254static SHELL_EXEC_SCHEMA: Lazy<ToolSchema> = Lazy::new(|| ToolSchema {
255    name: "shell_exec".into(),
256    description: "Run an arbitrary command in the workspace. Destructive — use sparingly. \
257                  Returns stdout/stderr/status."
258        .into(),
259    input: json!({
260        "type": "object",
261        "properties": {
262            "program": {"type": "string"},
263            "args":    {"type": "array", "items": {"type": "string"}},
264            "cwd":     {"type": "string"}
265        },
266        "required": ["program"]
267    }),
268});
269
270#[async_trait]
271impl Tool for ShellExec {
272    fn name(&self) -> &str {
273        "shell_exec"
274    }
275    fn schema(&self) -> &ToolSchema {
276        &SHELL_EXEC_SCHEMA
277    }
278    fn risk(&self) -> ToolRisk {
279        ToolRisk::Destructive
280    }
281
282    async fn invoke(
283        &self,
284        args: serde_json::Value,
285        world: &mut World,
286    ) -> Result<ToolResult, ToolError> {
287        let a: ShellArgs = serde_json::from_value(args).map_err(|e| ToolError::InvalidArgs {
288            name: self.name().into(),
289            reason: e.to_string(),
290        })?;
291        run(&a, world).await
292    }
293}
294
295// ---------- shared dispatch ----------
296
297async fn run(a: &ShellArgs, world: &mut World) -> Result<ToolResult, ToolError> {
298    let args_ref: Vec<&str> = a.args.iter().map(String::as_str).collect();
299    let cwd_buf;
300    let cwd = if let Some(c) = &a.cwd {
301        cwd_buf = world.repo.root.join(c);
302        Some(cwd_buf.as_path())
303    } else {
304        Some(world.repo.root.as_path())
305    };
306
307    let out = world
308        .runner
309        .exec(&a.program, &args_ref, cwd)
310        .await
311        .map_err(|e| ToolError::Exec(format!("spawn `{}`: {e}", a.program)))?;
312
313    // Truncate giant output so the model isn't drowned. Keep first 80 lines + last 40.
314    let stdout = clip_for_model(&out.stdout);
315    let stderr = clip_for_model(&out.stderr);
316
317    Ok(ToolResult {
318        ok: out.status == 0,
319        content: json!({
320            "status": out.status,
321            "stdout": stdout,
322            "stderr": stderr,
323        }),
324        trace: None,
325    })
326}
327
328fn clip_for_model(s: &str) -> String {
329    let lines: Vec<&str> = s.lines().collect();
330    if lines.len() <= 120 {
331        return s.to_string();
332    }
333    let head = lines
334        .iter()
335        .take(80)
336        .copied()
337        .collect::<Vec<&str>>()
338        .join("\n");
339    let tail = lines
340        .iter()
341        .rev()
342        .take(40)
343        .copied()
344        .collect::<Vec<&str>>()
345        .into_iter()
346        .rev()
347        .collect::<Vec<&str>>()
348        .join("\n");
349    format!(
350        "{head}\n... [{} lines clipped] ...\n{tail}",
351        lines.len() - 120
352    )
353}
354
355#[cfg(test)]
356mod tests {
357    use super::*;
358
359    fn args(parts: &[&str]) -> Vec<String> {
360        parts.iter().map(|s| s.to_string()).collect()
361    }
362
363    #[test]
364    fn cargo_check_is_safe() {
365        check_safe_args("cargo", &args(&["check"])).unwrap();
366        check_safe_args("cargo", &args(&["test", "--all"])).unwrap();
367        check_safe_args("cargo", &args(&["clippy", "--", "-D", "warnings"])).unwrap();
368    }
369
370    #[test]
371    fn cargo_install_blocked() {
372        assert!(check_safe_args("cargo", &args(&["install", "ripgrep"])).is_err());
373        assert!(check_safe_args("cargo", &args(&["publish"])).is_err());
374        assert!(check_safe_args("cargo", &args(&["yank", "0.1.0"])).is_err());
375    }
376
377    #[test]
378    fn git_config_read_vs_write() {
379        // Read: `git config user.email`
380        check_safe_args("git", &args(&["config", "user.email"])).unwrap();
381        // Write: `git config user.email evil@x` → blocked
382        assert!(check_safe_args("git", &args(&["config", "user.email", "evil@x"])).is_err());
383        // Flag-prefixed (like --list) is allowed
384        check_safe_args("git", &args(&["config", "--list"])).unwrap();
385    }
386
387    #[test]
388    fn find_exec_blocked() {
389        assert!(
390            check_safe_args(
391                "find",
392                &args(&[".", "-name", "*.rs", "-exec", "rm", "{}", ";"])
393            )
394            .is_err()
395        );
396        check_safe_args("find", &args(&[".", "-name", "*.rs"])).unwrap();
397    }
398
399    #[test]
400    fn unknown_program_blocked() {
401        assert!(check_safe_args("sudo", &args(&["rm", "-rf", "/"])).is_err());
402        assert!(check_safe_args("curl", &args(&["evil.com"])).is_err());
403    }
404}