rustyclaw_core/tools/
helpers.rs

1//! Helper functions and global state for the tools system.
2
3use crate::process_manager::{ProcessManager, SharedProcessManager};
4use crate::sandbox::{Sandbox, SandboxMode, SandboxPolicy};
5use std::path::{Path, PathBuf};
6use std::sync::{Arc, Mutex, OnceLock};
7use tracing::{debug, warn};
8
9// ── Global process manager ──────────────────────────────────────────────────
10
11/// Global process manager for background exec sessions.
12static PROCESS_MANAGER: OnceLock<SharedProcessManager> = OnceLock::new();
13
14/// Get the global process manager instance.
15pub fn process_manager() -> &'static SharedProcessManager {
16    PROCESS_MANAGER.get_or_init(|| Arc::new(Mutex::new(ProcessManager::new())))
17}
18
19// ── Global sandbox configuration ────────────────────────────────────────────
20
21/// Global sandbox instance, initialized once at gateway startup.
22static SANDBOX: OnceLock<Sandbox> = OnceLock::new();
23
24/// Called once from the gateway to initialize the sandbox.
25pub fn init_sandbox(
26    mode: SandboxMode,
27    workspace: PathBuf,
28    credentials_dir: PathBuf,
29    deny_paths: Vec<PathBuf>,
30) {
31    debug!(?mode, ?workspace, "Initializing sandbox");
32    let mut policy = SandboxPolicy::protect_credentials(&credentials_dir, &workspace);
33    for path in deny_paths {
34        policy = policy.deny_read(path.clone()).deny_write(path);
35    }
36    let sandbox = Sandbox::with_mode(mode, policy);
37    let _ = SANDBOX.set(sandbox);
38}
39
40/// Get the global sandbox instance, if initialized.
41pub fn sandbox() -> Option<&'static Sandbox> {
42    SANDBOX.get()
43}
44
45/// Run a command through the sandbox (or unsandboxed if not initialized).
46pub fn run_sandboxed_command(command: &str, cwd: &Path) -> Result<std::process::Output, String> {
47    if let Some(sb) = SANDBOX.get() {
48        debug!(mode = ?sb.mode, cwd = %cwd.display(), "Running sandboxed command");
49        // Update policy workspace to the actual cwd for this command
50        let mut policy = sb.policy.clone();
51        policy.workspace = cwd.to_path_buf();
52        crate::sandbox::run_sandboxed(command, &policy, sb.mode)
53    } else {
54        debug!(cwd = %cwd.display(), "Running unsandboxed command (no sandbox configured)");
55        // No sandbox configured, run directly
56        std::process::Command::new("sh")
57            .arg("-c")
58            .arg(command)
59            .current_dir(cwd)
60            .output()
61            .map_err(|e| format!("Command failed: {}", e))
62    }
63}
64
65// ── Credentials directory protection ────────────────────────────────────────
66
67/// Absolute path of the credentials directory, set once at gateway startup.
68static CREDENTIALS_DIR: OnceLock<PathBuf> = OnceLock::new();
69
70// ── Global vault for cookie jar access ──────────────────────────────────────
71
72use crate::secrets::SecretsManager;
73
74/// Shared vault type for thread-safe access (uses tokio::sync::Mutex for async).
75pub type SharedVault = Arc<tokio::sync::Mutex<SecretsManager>>;
76
77/// Global vault instance, set once at gateway startup.
78static VAULT: OnceLock<SharedVault> = OnceLock::new();
79
80/// Called once from the gateway to register the vault for tool access.
81pub fn set_vault(vault: SharedVault) {
82    let _ = VAULT.set(vault);
83}
84
85/// Get the global vault instance, if initialized.
86pub fn vault() -> Option<&'static SharedVault> {
87    VAULT.get()
88}
89
90/// Called once from the gateway to register the credentials path.
91pub fn set_credentials_dir(path: PathBuf) {
92    let _ = CREDENTIALS_DIR.set(path);
93}
94
95/// Returns `true` when a command string references the credentials directory.
96pub fn command_references_credentials(command: &str) -> bool {
97    if let Some(cred_dir) = CREDENTIALS_DIR.get() {
98        let cred_str = cred_dir.to_string_lossy();
99        command.contains(cred_str.as_ref())
100    } else {
101        false
102    }
103}
104
105/// Returns `true` when `path` falls inside the credentials directory.
106pub fn is_protected_path(path: &Path) -> bool {
107    if let Some(cred_dir) = CREDENTIALS_DIR.get() {
108        // Canonicalise both so symlinks / ".." can't bypass the check.
109        let canon_cred = match cred_dir.canonicalize() {
110            Ok(p) => p,
111            Err(_) => return false, // dir doesn't exist yet – nothing to protect
112        };
113        let canon_path = match path.canonicalize() {
114            Ok(p) => p,
115            Err(_) => {
116                // File may not exist yet (write_file).  Fall back to
117                // starts_with on the raw absolute path.
118                return path.starts_with(cred_dir);
119            }
120        };
121        canon_path.starts_with(&canon_cred)
122    } else {
123        false
124    }
125}
126
127/// Standard denial message when a tool tries to touch the vault.
128pub const VAULT_ACCESS_DENIED: &str = "Access denied: the credentials directory is protected. Use the secrets_list / secrets_get / secrets_store tools instead.";
129
130// ── Path helpers ────────────────────────────────────────────────────────────
131
132/// Resolve a path argument against the workspace root.
133/// Absolute paths are used as-is; relative paths are joined to `workspace_dir`.
134pub fn resolve_path(workspace_dir: &Path, path: &str) -> PathBuf {
135    let p = Path::new(path);
136    if p.is_absolute() {
137        p.to_path_buf()
138    } else {
139        workspace_dir.join(p)
140    }
141}
142
143/// Expand a leading `~` to the user's home directory.
144pub fn expand_tilde(p: &str) -> PathBuf {
145    if p.starts_with('~') {
146        dirs::home_dir()
147            .map(|h| h.join(p.strip_prefix("~/").unwrap_or(&p[1..])))
148            .unwrap_or_else(|| PathBuf::from(p))
149    } else {
150        PathBuf::from(p)
151    }
152}
153
154/// Decide how to present a path found during a search.
155///
156/// If `found` lives inside `workspace_dir`, return a workspace-relative path
157/// so the model can pass it directly to `read_file` (which will resolve it
158/// back against `workspace_dir`).  Otherwise return the **absolute** path so
159/// the model can still use it with tools that accept absolute paths.
160pub fn display_path(found: &Path, workspace_dir: &Path) -> String {
161    if let Ok(rel) = found.strip_prefix(workspace_dir) {
162        rel.display().to_string()
163    } else {
164        found.display().to_string()
165    }
166}
167
168/// Filter for `walkdir` — skip common non-content directories.
169pub fn should_visit(entry: &walkdir::DirEntry) -> bool {
170    let name = entry.file_name().to_string_lossy();
171    if entry.file_type().is_dir() {
172        if matches!(
173            name.as_ref(),
174            ".git" | "node_modules" | "target" | ".hg" | ".svn" | "__pycache__" | "dist" | "build"
175        ) {
176            return false;
177        }
178        // Never recurse into the credentials directory.
179        if is_protected_path(entry.path()) {
180            return false;
181        }
182        true
183    } else {
184        true
185    }
186}
187
188// ── Tool output sanitization ────────────────────────────────────────────────
189
190/// Maximum size for tool output before truncation (50 KB).
191const MAX_TOOL_OUTPUT_BYTES: usize = 50_000;
192
193/// Detect if content looks like HTML or encoded binary data.
194fn is_likely_garbage(s: &str) -> bool {
195    // Check for HTML markers
196    let lower = s.to_lowercase();
197    if lower.contains("<!doctype") || lower.contains("<html") {
198        return true;
199    }
200
201    // Check for base64-encoded data URIs
202    if s.contains("data:image/") || s.contains("data:application/") {
203        return true;
204    }
205
206    // Check for excessive base64-like content (long strings without spaces)
207    let lines: Vec<&str> = s.lines().collect();
208    let long_dense_lines = lines
209        .iter()
210        .filter(|line| line.len() > 500 && !line.contains(' '))
211        .count();
212    if long_dense_lines > 3 {
213        return true;
214    }
215
216    false
217}
218
219/// Sanitize tool output: truncate if too large, warn if garbage detected.
220pub fn sanitize_tool_output(output: String) -> String {
221    // Check for garbage content first
222    if is_likely_garbage(&output) {
223        let preview_len = output.len().min(500);
224        let preview: String = output.chars().take(preview_len).collect();
225        warn!(bytes = output.len(), "Tool returned HTML/binary content");
226        return format!(
227            "[Warning: Tool returned HTML/binary content ({} bytes) — likely not useful]\n\nPreview:\n{}...",
228            output.len(),
229            preview
230        );
231    }
232
233    // Truncate if too large
234    if output.len() > MAX_TOOL_OUTPUT_BYTES {
235        debug!(
236            bytes = output.len(),
237            max = MAX_TOOL_OUTPUT_BYTES,
238            "Truncating large tool output"
239        );
240        let truncated: String = output.chars().take(MAX_TOOL_OUTPUT_BYTES).collect();
241        format!(
242            "{}...\n\n[Truncated: {} bytes total, showing first {}]",
243            truncated,
244            output.len(),
245            MAX_TOOL_OUTPUT_BYTES
246        )
247    } else {
248        output
249    }
250}
rustyclaw_core/tools/helpers.rs

rustyclaw_core/tools/
helpers.rs