Skip to main content

orcs_lua/
sanitize.rs

1//! Input sanitization and structured execution for Lua.
2//!
3//! Provides Rust-implemented sanitization primitives exposed to Lua,
4//! powered by the [`shell_sanitize`] crate. Lua script authors use
5//! these to validate arguments before passing them to execution APIs.
6//!
7//! # Lua API
8//!
9//! | Function | Preset | Use case |
10//! |----------|--------|----------|
11//! | `orcs.sanitize_arg(s)` | `command_arg` | model name, session_id, flags |
12//! | `orcs.sanitize_path(s)` | `file_path` | relative paths within sandbox |
13//! | `orcs.sanitize_strict(s)` | `strict` | values reaching a shell |
14//!
15//! Each returns `{ok, value, error, violations}`.
16//!
17//! # Structured Execution
18//!
19//! | Function | Description |
20//! |----------|-------------|
21//! | `orcs.exec_argv(program, args [, opts])` | Shell-free execution via `Command::new` |
22//!
23//! `exec_argv` bypasses the shell entirely. Arguments are passed directly
24//! to the OS exec layer, making shell injection structurally impossible.
25//!
26//! # Example
27//!
28//! ```lua
29//! local check = orcs.sanitize_arg(opts.model)
30//! if not check.ok then return { ok = false, error = check.error } end
31//!
32//! local result = orcs.llm(prompt, {
33//!     model = check.value,
34//! })
35//! ```
36
37use mlua::{Lua, Table};
38use shell_sanitize_rules::presets;
39
40/// Registers sanitization functions into the `orcs` Lua table.
41///
42/// Adds:
43/// - `orcs.sanitize_arg(s)` — command_arg preset (ControlChar only)
44/// - `orcs.sanitize_path(s)` — file_path preset (PathTraversal + ControlChar)
45/// - `orcs.sanitize_strict(s)` — strict preset (all 5 rules)
46///
47/// These are always available (no Capability gate — they are pure validation).
48pub fn register_sanitize_functions(lua: &Lua, orcs_table: &Table) -> Result<(), mlua::Error> {
49    // orcs.sanitize_arg(s) -> {ok, value, error, violations}
50    let sanitize_arg_fn = lua.create_function(|lua, input: String| {
51        let sanitizer = presets::command_arg();
52        sanitize_to_lua(lua, &sanitizer, &input)
53    })?;
54    orcs_table.set("sanitize_arg", sanitize_arg_fn)?;
55
56    // orcs.sanitize_path(s) -> {ok, value, error, violations}
57    let sanitize_path_fn = lua.create_function(|lua, input: String| {
58        let sanitizer = presets::file_path();
59        sanitize_to_lua(lua, &sanitizer, &input)
60    })?;
61    orcs_table.set("sanitize_path", sanitize_path_fn)?;
62
63    // orcs.sanitize_strict(s) -> {ok, value, error, violations}
64    let sanitize_strict_fn = lua.create_function(|lua, input: String| {
65        let sanitizer = presets::strict();
66        sanitize_to_lua(lua, &sanitizer, &input)
67    })?;
68    orcs_table.set("sanitize_strict", sanitize_strict_fn)?;
69
70    Ok(())
71}
72
73/// Runs a sanitizer and converts the result to a Lua table.
74///
75/// On success: `{ok=true, value="sanitized_string"}`
76/// On failure: `{ok=false, error="human-readable", violations={...}}`
77fn sanitize_to_lua<T: shell_sanitize::MarkerType>(
78    lua: &Lua,
79    sanitizer: &shell_sanitize::Sanitizer<T>,
80    input: &str,
81) -> Result<Table, mlua::Error> {
82    let result = lua.create_table()?;
83
84    match sanitizer.sanitize(input) {
85        Ok(sanitized) => {
86            result.set("ok", true)?;
87            result.set("value", sanitized.as_str())?;
88        }
89        Err(err) => {
90            result.set("ok", false)?;
91            result.set("error", err.to_string())?;
92
93            // Structured violations array for programmatic access
94            let violations_table = lua.create_table()?;
95            for (i, v) in err.violations.iter().enumerate() {
96                let entry = lua.create_table()?;
97                entry.set("rule", v.rule_name)?;
98                entry.set("message", v.message.as_str())?;
99                if let Some(pos) = v.position {
100                    entry.set("position", pos)?;
101                }
102                if let Some(ref frag) = v.fragment {
103                    entry.set("fragment", frag.as_str())?;
104                }
105                violations_table.set(i + 1, entry)?; // Lua 1-indexed
106            }
107            result.set("violations", violations_table)?;
108        }
109    }
110
111    Ok(result)
112}
113
114/// Registers `orcs.exec_argv` (base version, deny-by-default).
115///
116/// This stub always denies execution. The permission-checked override
117/// is registered by `register_exec_argv_with_context` when a ChildContext
118/// is available.
119pub fn register_exec_argv_deny(lua: &Lua, orcs_table: &Table) -> Result<(), mlua::Error> {
120    let exec_argv_fn = lua.create_function(|lua, (_program, _args): (String, Table)| {
121        let result = lua.create_table()?;
122        result.set("ok", false)?;
123        result.set("stdout", "")?;
124        result.set(
125            "stderr",
126            "exec_argv denied: no execution context (ChildContext required)",
127        )?;
128        result.set("code", -1)?;
129        Ok(result)
130    })?;
131    orcs_table.set("exec_argv", exec_argv_fn)?;
132    Ok(())
133}
134
135/// Builds and executes a `Command` from program + args + opts.
136///
137/// Shared implementation used by both `child.rs` and `ctx_fns.rs`
138/// after capability/permission checks have passed.
139///
140/// # Arguments
141///
142/// * `program` - Executable name or path
143/// * `args` - Lua table (array) of string arguments
144/// * `opts` - Optional Lua table: `{ env_remove = {"VAR1", ...}, cwd = "path" }`
145/// * `default_cwd` - Fallback working directory (sandbox root)
146pub fn exec_argv_impl(
147    lua: &Lua,
148    program: &str,
149    args: &Table,
150    opts: Option<&Table>,
151    default_cwd: &std::path::Path,
152) -> Result<Table, mlua::Error> {
153    // Collect args from Lua table
154    let mut arg_vec: Vec<String> = Vec::new();
155    let len = args.len()? as usize;
156    for i in 1..=len {
157        let arg: String = args.get(i)?;
158        arg_vec.push(arg);
159    }
160
161    let mut cmd = std::process::Command::new(program);
162    cmd.args(&arg_vec);
163
164    // Handle opts.env_remove
165    if let Some(opts_table) = opts {
166        if let Ok(env_remove) = opts_table.get::<Table>("env_remove") {
167            let env_len = env_remove.len()? as usize;
168            for i in 1..=env_len {
169                if let Ok(var) = env_remove.get::<String>(i) {
170                    cmd.env_remove(&var);
171                }
172            }
173        }
174
175        // Handle opts.cwd
176        if let Ok(cwd) = opts_table.get::<String>("cwd") {
177            cmd.current_dir(&cwd);
178        } else {
179            cmd.current_dir(default_cwd);
180        }
181    } else {
182        cmd.current_dir(default_cwd);
183    }
184
185    let output = cmd
186        .output()
187        .map_err(|e| mlua::Error::ExternalError(std::sync::Arc::new(e)))?;
188
189    let result = lua.create_table()?;
190    result.set("ok", output.status.success())?;
191    result.set(
192        "stdout",
193        String::from_utf8_lossy(&output.stdout).to_string(),
194    )?;
195    result.set(
196        "stderr",
197        String::from_utf8_lossy(&output.stderr).to_string(),
198    )?;
199    match output.status.code() {
200        Some(code) => result.set("code", code)?,
201        None => {
202            result.set("code", mlua::Value::Nil)?;
203            result.set("signal_terminated", true)?;
204        }
205    }
206
207    Ok(result)
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213    use crate::orcs_helpers::ensure_orcs_table;
214
215    fn setup_lua() -> (Lua, Table) {
216        let lua = Lua::new();
217        let orcs = ensure_orcs_table(&lua).expect("create orcs table");
218        register_sanitize_functions(&lua, &orcs).expect("register sanitize functions");
219        register_exec_argv_deny(&lua, &orcs).expect("register exec_argv deny");
220        (lua, orcs)
221    }
222
223    // ── sanitize_arg tests ──
224
225    #[test]
226    fn sanitize_arg_accepts_clean_string() {
227        let (lua, _) = setup_lua();
228        let result: Table = lua
229            .load(r#"return orcs.sanitize_arg("claude-haiku-4-5-20251001")"#)
230            .eval()
231            .expect("eval sanitize_arg");
232        assert!(result.get::<bool>("ok").expect("get ok"));
233        assert_eq!(
234            result.get::<String>("value").expect("get value"),
235            "claude-haiku-4-5-20251001"
236        );
237    }
238
239    #[test]
240    fn sanitize_arg_rejects_null_byte() {
241        let (lua, _) = setup_lua();
242        let result: Table = lua
243            .load(r#"return orcs.sanitize_arg("model\0injected")"#)
244            .eval()
245            .expect("eval sanitize_arg");
246        assert!(!result.get::<bool>("ok").expect("get ok"));
247        let error = result.get::<String>("error").expect("get error");
248        assert!(
249            error.contains("violation"),
250            "error should mention violation, got: {}",
251            error
252        );
253    }
254
255    #[test]
256    fn sanitize_arg_rejects_control_char() {
257        let (lua, _) = setup_lua();
258        // \x01 = SOH control character
259        let result: Table = lua
260            .load(r#"return orcs.sanitize_arg("bad" .. string.char(1) .. "input")"#)
261            .eval()
262            .expect("eval sanitize_arg");
263        assert!(!result.get::<bool>("ok").expect("get ok"));
264
265        // Check violations array
266        let violations: Table = result.get("violations").expect("get violations");
267        let first: Table = violations.get(1).expect("get first violation");
268        let rule = first.get::<String>("rule").expect("get rule name");
269        assert_eq!(rule, "control_char");
270    }
271
272    #[test]
273    fn sanitize_arg_allows_normal_punctuation() {
274        let (lua, _) = setup_lua();
275        // command_arg preset only checks control chars — shell metacharacters are fine
276        // because Command::new().arg() doesn't go through a shell
277        let result: Table = lua
278            .load(r#"return orcs.sanitize_arg("hello world $VAR 'quoted'")"#)
279            .eval()
280            .expect("eval sanitize_arg");
281        assert!(
282            result.get::<bool>("ok").expect("get ok"),
283            "command_arg should allow shell metacharacters"
284        );
285    }
286
287    // ── sanitize_path tests ──
288
289    #[test]
290    fn sanitize_path_accepts_relative_path() {
291        let (lua, _) = setup_lua();
292        let result: Table = lua
293            .load(r#"return orcs.sanitize_path("src/lib.rs")"#)
294            .eval()
295            .expect("eval sanitize_path");
296        assert!(result.get::<bool>("ok").expect("get ok"));
297        assert_eq!(
298            result.get::<String>("value").expect("get value"),
299            "src/lib.rs"
300        );
301    }
302
303    #[test]
304    fn sanitize_path_rejects_traversal() {
305        let (lua, _) = setup_lua();
306        let result: Table = lua
307            .load(r#"return orcs.sanitize_path("../../etc/passwd")"#)
308            .eval()
309            .expect("eval sanitize_path");
310        assert!(!result.get::<bool>("ok").expect("get ok"));
311        let error = result.get::<String>("error").expect("get error");
312        assert!(
313            error.contains("violation"),
314            "should reject path traversal, got: {}",
315            error
316        );
317    }
318
319    #[test]
320    fn sanitize_path_rejects_absolute_path() {
321        let (lua, _) = setup_lua();
322        let result: Table = lua
323            .load(r#"return orcs.sanitize_path("/etc/shadow")"#)
324            .eval()
325            .expect("eval sanitize_path");
326        assert!(!result.get::<bool>("ok").expect("get ok"));
327    }
328
329    // ── sanitize_strict tests ──
330
331    #[test]
332    fn sanitize_strict_accepts_clean_string() {
333        let (lua, _) = setup_lua();
334        let result: Table = lua
335            .load(r#"return orcs.sanitize_strict("safe-filename.txt")"#)
336            .eval()
337            .expect("eval sanitize_strict");
338        assert!(result.get::<bool>("ok").expect("get ok"));
339    }
340
341    #[test]
342    fn sanitize_strict_rejects_shell_metachar() {
343        let (lua, _) = setup_lua();
344        let result: Table = lua
345            .load(r#"return orcs.sanitize_strict("file; rm -rf /")"#)
346            .eval()
347            .expect("eval sanitize_strict");
348        assert!(!result.get::<bool>("ok").expect("get ok"));
349    }
350
351    #[test]
352    fn sanitize_strict_rejects_env_expansion() {
353        let (lua, _) = setup_lua();
354        let result: Table = lua
355            .load(r#"return orcs.sanitize_strict("$HOME/.ssh/id_rsa")"#)
356            .eval()
357            .expect("eval sanitize_strict");
358        assert!(!result.get::<bool>("ok").expect("get ok"));
359    }
360
361    // ── exec_argv deny-by-default tests ──
362
363    #[test]
364    fn exec_argv_denied_without_context() {
365        let (lua, _) = setup_lua();
366        let result: Table = lua
367            .load(r#"return orcs.exec_argv("echo", {"hello"})"#)
368            .eval()
369            .expect("eval exec_argv");
370        assert!(!result.get::<bool>("ok").expect("get ok"));
371        let stderr = result.get::<String>("stderr").expect("get stderr");
372        assert!(
373            stderr.contains("exec_argv denied"),
374            "should be denied, got: {}",
375            stderr
376        );
377    }
378
379    // ── exec_argv_impl tests ──
380
381    #[test]
382    fn exec_argv_impl_runs_command() {
383        let lua = Lua::new();
384        let args = lua.create_table().expect("create args table");
385        args.set(1, "hello from exec_argv").expect("set arg 1");
386
387        let result = exec_argv_impl(&lua, "echo", &args, None, std::path::Path::new("."))
388            .expect("exec_argv_impl should succeed");
389
390        assert!(result.get::<bool>("ok").expect("get ok"));
391        let stdout = result.get::<String>("stdout").expect("get stdout");
392        assert!(
393            stdout.contains("hello from exec_argv"),
394            "stdout should contain output, got: {}",
395            stdout
396        );
397    }
398
399    #[test]
400    fn exec_argv_impl_with_env_remove() {
401        let lua = Lua::new();
402        let args = lua.create_table().expect("create args table");
403        args.set(1, "-c").expect("set arg");
404        args.set(2, "echo ${TEST_SANITIZE_VAR:-unset}")
405            .expect("set arg 2");
406
407        let opts = lua.create_table().expect("create opts table");
408        let env_remove = lua.create_table().expect("create env_remove");
409        env_remove.set(1, "TEST_SANITIZE_VAR").expect("set env var");
410        opts.set("env_remove", env_remove).expect("set env_remove");
411
412        // Set the env var first so we can verify it gets removed
413        std::env::set_var("TEST_SANITIZE_VAR", "should_be_removed");
414
415        let result = exec_argv_impl(&lua, "sh", &args, Some(&opts), std::path::Path::new("."))
416            .expect("exec_argv_impl should succeed");
417
418        // Clean up
419        std::env::remove_var("TEST_SANITIZE_VAR");
420
421        assert!(result.get::<bool>("ok").expect("get ok"));
422        let stdout = result.get::<String>("stdout").expect("get stdout");
423        assert!(
424            stdout.contains("unset"),
425            "env var should have been removed, got: {}",
426            stdout
427        );
428    }
429
430    #[test]
431    fn exec_argv_impl_nonexistent_program() {
432        let lua = Lua::new();
433        let args = lua.create_table().expect("create args table");
434
435        let result = exec_argv_impl(
436            &lua,
437            "nonexistent_program_xyz_12345",
438            &args,
439            None,
440            std::path::Path::new("."),
441        );
442
443        assert!(result.is_err(), "should fail for nonexistent program");
444    }
445
446    #[test]
447    fn exec_argv_impl_no_shell_injection() {
448        let lua = Lua::new();
449        let args = lua.create_table().expect("create args table");
450        // This would be dangerous with sh -c, but exec_argv passes it as a literal arg
451        args.set(1, "hello; echo INJECTED").expect("set arg");
452
453        let result = exec_argv_impl(&lua, "echo", &args, None, std::path::Path::new("."))
454            .expect("exec_argv_impl should succeed");
455
456        let stdout = result.get::<String>("stdout").expect("get stdout");
457        // The semicolon should appear literally in output, not execute a second command
458        assert!(
459            stdout.contains("hello; echo INJECTED"),
460            "should treat semicolon as literal, got: {}",
461            stdout
462        );
463    }
464}