rustyclaw_core/tools/
helpers.rs

1//! Helper functions and global state for the tools system.
2
3use crate::process_manager::{ProcessManager, SharedProcessManager};
4use crate::sandbox::{Sandbox, SandboxMode, SandboxPolicy};
5use std::path::{Path, PathBuf};
6use std::sync::{Arc, Mutex, OnceLock};
7use tracing::{debug, warn};
8
9// ── Global process manager ──────────────────────────────────────────────────
10
11/// Global process manager for background exec sessions.
12static PROCESS_MANAGER: OnceLock<SharedProcessManager> = OnceLock::new();
13
14/// Get the global process manager instance.
15pub fn process_manager() -> &'static SharedProcessManager {
16    PROCESS_MANAGER.get_or_init(|| Arc::new(Mutex::new(ProcessManager::new())))
17}
18
19// ── Global sandbox configuration ────────────────────────────────────────────
20
21/// Global sandbox instance, initialized once at gateway startup.
22static SANDBOX: OnceLock<Sandbox> = OnceLock::new();
23
24/// Called once from the gateway to initialize the sandbox.
25pub fn init_sandbox(mode: SandboxMode, workspace: PathBuf, credentials_dir: PathBuf, deny_paths: Vec<PathBuf>) {
26    debug!(?mode, ?workspace, "Initializing sandbox");
27    let mut policy = SandboxPolicy::protect_credentials(&credentials_dir, &workspace);
28    for path in deny_paths {
29        policy = policy.deny_read(path.clone()).deny_write(path);
30    }
31    let sandbox = Sandbox::with_mode(mode, policy);
32    let _ = SANDBOX.set(sandbox);
33}
34
35/// Get the global sandbox instance, if initialized.
36pub fn sandbox() -> Option<&'static Sandbox> {
37    SANDBOX.get()
38}
39
40/// Run a command through the sandbox (or unsandboxed if not initialized).
41pub fn run_sandboxed_command(command: &str, cwd: &Path) -> Result<std::process::Output, String> {
42    if let Some(sb) = SANDBOX.get() {
43        debug!(mode = ?sb.mode, cwd = %cwd.display(), "Running sandboxed command");
44        // Update policy workspace to the actual cwd for this command
45        let mut policy = sb.policy.clone();
46        policy.workspace = cwd.to_path_buf();
47        crate::sandbox::run_sandboxed(command, &policy, sb.mode)
48    } else {
49        debug!(cwd = %cwd.display(), "Running unsandboxed command (no sandbox configured)");
50        // No sandbox configured, run directly
51        std::process::Command::new("sh")
52            .arg("-c")
53            .arg(command)
54            .current_dir(cwd)
55            .output()
56            .map_err(|e| format!("Command failed: {}", e))
57    }
58}
59
60// ── Credentials directory protection ────────────────────────────────────────
61
62/// Absolute path of the credentials directory, set once at gateway startup.
63static CREDENTIALS_DIR: OnceLock<PathBuf> = OnceLock::new();
64
65// ── Global vault for cookie jar access ──────────────────────────────────────
66
67use crate::secrets::SecretsManager;
68
69/// Shared vault type for thread-safe access (uses tokio::sync::Mutex for async).
70pub type SharedVault = Arc<tokio::sync::Mutex<SecretsManager>>;
71
72/// Global vault instance, set once at gateway startup.
73static VAULT: OnceLock<SharedVault> = OnceLock::new();
74
75/// Called once from the gateway to register the vault for tool access.
76pub fn set_vault(vault: SharedVault) {
77    let _ = VAULT.set(vault);
78}
79
80/// Get the global vault instance, if initialized.
81pub fn vault() -> Option<&'static SharedVault> {
82    VAULT.get()
83}
84
85/// Called once from the gateway to register the credentials path.
86pub fn set_credentials_dir(path: PathBuf) {
87    let _ = CREDENTIALS_DIR.set(path);
88}
89
90/// Returns `true` when a command string references the credentials directory.
91pub fn command_references_credentials(command: &str) -> bool {
92    if let Some(cred_dir) = CREDENTIALS_DIR.get() {
93        let cred_str = cred_dir.to_string_lossy();
94        command.contains(cred_str.as_ref())
95    } else {
96        false
97    }
98}
99
100/// Returns `true` when `path` falls inside the credentials directory.
101pub fn is_protected_path(path: &Path) -> bool {
102    if let Some(cred_dir) = CREDENTIALS_DIR.get() {
103        // Canonicalise both so symlinks / ".." can't bypass the check.
104        let canon_cred = match cred_dir.canonicalize() {
105            Ok(p) => p,
106            Err(_) => return false, // dir doesn't exist yet – nothing to protect
107        };
108        let canon_path = match path.canonicalize() {
109            Ok(p) => p,
110            Err(_) => {
111                // File may not exist yet (write_file).  Fall back to
112                // starts_with on the raw absolute path.
113                return path.starts_with(cred_dir);
114            }
115        };
116        canon_path.starts_with(&canon_cred)
117    } else {
118        false
119    }
120}
121
122/// Standard denial message when a tool tries to touch the vault.
123pub const VAULT_ACCESS_DENIED: &str =
124    "Access denied: the credentials directory is protected. Use the secrets_list / secrets_get / secrets_store tools instead.";
125
126// ── Path helpers ────────────────────────────────────────────────────────────
127
128/// Resolve a path argument against the workspace root.
129/// Absolute paths are used as-is; relative paths are joined to `workspace_dir`.
130pub fn resolve_path(workspace_dir: &Path, path: &str) -> PathBuf {
131    let p = Path::new(path);
132    if p.is_absolute() {
133        p.to_path_buf()
134    } else {
135        workspace_dir.join(p)
136    }
137}
138
139/// Expand a leading `~` to the user's home directory.
140pub fn expand_tilde(p: &str) -> PathBuf {
141    if p.starts_with('~') {
142        dirs::home_dir()
143            .map(|h| h.join(p.strip_prefix("~/").unwrap_or(&p[1..])))
144            .unwrap_or_else(|| PathBuf::from(p))
145    } else {
146        PathBuf::from(p)
147    }
148}
149
150/// Decide how to present a path found during a search.
151///
152/// If `found` lives inside `workspace_dir`, return a workspace-relative path
153/// so the model can pass it directly to `read_file` (which will resolve it
154/// back against `workspace_dir`).  Otherwise return the **absolute** path so
155/// the model can still use it with tools that accept absolute paths.
156pub fn display_path(found: &Path, workspace_dir: &Path) -> String {
157    if let Ok(rel) = found.strip_prefix(workspace_dir) {
158        rel.display().to_string()
159    } else {
160        found.display().to_string()
161    }
162}
163
164/// Filter for `walkdir` — skip common non-content directories.
165pub fn should_visit(entry: &walkdir::DirEntry) -> bool {
166    let name = entry.file_name().to_string_lossy();
167    if entry.file_type().is_dir() {
168        if matches!(
169            name.as_ref(),
170            ".git" | "node_modules" | "target" | ".hg" | ".svn"
171                | "__pycache__" | "dist" | "build"
172        ) {
173            return false;
174        }
175        // Never recurse into the credentials directory.
176        if is_protected_path(entry.path()) {
177            return false;
178        }
179        true
180    } else {
181        true
182    }
183}
184
185// ── Tool output sanitization ────────────────────────────────────────────────
186
187/// Maximum size for tool output before truncation (50 KB).
188const MAX_TOOL_OUTPUT_BYTES: usize = 50_000;
189
190/// Detect if content looks like HTML or encoded binary data.
191fn is_likely_garbage(s: &str) -> bool {
192    // Check for HTML markers
193    let lower = s.to_lowercase();
194    if lower.contains("<!doctype") || lower.contains("<html") {
195        return true;
196    }
197    
198    // Check for base64-encoded data URIs
199    if s.contains("data:image/") || s.contains("data:application/") {
200        return true;
201    }
202    
203    // Check for excessive base64-like content (long strings without spaces)
204    let lines: Vec<&str> = s.lines().collect();
205    let long_dense_lines = lines.iter().filter(|line| {
206        line.len() > 500 && !line.contains(' ')
207    }).count();
208    if long_dense_lines > 3 {
209        return true;
210    }
211    
212    false
213}
214
215/// Sanitize tool output: truncate if too large, warn if garbage detected.
216pub fn sanitize_tool_output(output: String) -> String {
217    // Check for garbage content first
218    if is_likely_garbage(&output) {
219        let preview_len = output.len().min(500);
220        let preview: String = output.chars().take(preview_len).collect();
221        warn!(bytes = output.len(), "Tool returned HTML/binary content");
222        return format!(
223            "[Warning: Tool returned HTML/binary content ({} bytes) — likely not useful]\n\nPreview:\n{}...",
224            output.len(),
225            preview
226        );
227    }
228    
229    // Truncate if too large
230    if output.len() > MAX_TOOL_OUTPUT_BYTES {
231        debug!(bytes = output.len(), max = MAX_TOOL_OUTPUT_BYTES, "Truncating large tool output");
232        let truncated: String = output.chars().take(MAX_TOOL_OUTPUT_BYTES).collect();
233        format!(
234            "{}...\n\n[Truncated: {} bytes total, showing first {}]",
235            truncated,
236            output.len(),
237            MAX_TOOL_OUTPUT_BYTES
238        )
239    } else {
240        output
241    }
242}
rustyclaw_core/tools/helpers.rs

rustyclaw_core/tools/
helpers.rs