Skip to main content

hematite/tools/
guard.rs

1use super::tool::RiskLevel;
2use std::path::{Path, PathBuf};
3
4#[allow(dead_code)]
5pub const PROTECTED_FILES: &[&str] = &[
6    // Windows System
7    "C:\\Windows",
8    "C:\\Program Files",
9    "C:\\$Recycle.Bin",
10    "System Volume Information",
11    "C:\\Users\\Default",
12    // Linux/Unix System
13    "/etc",
14    "/dev",
15    "/proc",
16    "/sys",
17    "/root",
18    "/var/log",
19    "/boot",
20    // User Sensitives
21    ".bashrc",
22    ".zshrc",
23    ".bash_history",
24    ".gitconfig",
25    ".ssh/",
26    ".aws/",
27    ".env",
28    "credentials.json",
29    "auth.json",
30    "id_rsa",
31    // Hematite Internal
32    ".mcp.json",
33    "hematite_memory.db",
34];
35
36/// Enforces the absolute Canonical Traversal lock on the LLM, rendering directory climbing (`../`) obsolete
37/// and blocking any OS-critical reads aggressively by cross-referencing global blacklists.
38#[allow(dead_code)]
39pub fn path_is_safe(workspace_root: &Path, target: &Path) -> Result<PathBuf, String> {
40    // 1) Evaluate target string explicitly normalizing unicode and backslash injection vectors!
41    let mut target_str = target.to_string_lossy().to_string().to_lowercase();
42    target_str = target_str
43        .replace("\\", "/")
44        .replace("\u{005c}", "/")
45        .replace("%5c", "/");
46
47    // Early evaluation covering read-only "Ghosting" on target secrets explicitly
48    for protected in PROTECTED_FILES {
49        let prot_lower = protected.to_lowercase().replace("\\", "/");
50        if target_str.contains(&prot_lower) {
51            return Err(format!(
52                "AccessDenied: Path {} hits the Hematite Security Blacklist natively: {}",
53                target_str, protected
54            ));
55        }
56    }
57
58    // 2) Native Canonicalization - Forcing OS Reality Context over LLM hallucinations
59    let resolved_path = match std::fs::canonicalize(target) {
60        Ok(p) => p,
61        Err(_) => {
62            // If creating a brand new isolated file, physically trace the parent node
63            let parent = target.parent().unwrap_or(Path::new(""));
64            let mut resolved_parent = std::fs::canonicalize(parent)
65                .map_err(|_| "AccessDenied: Invalid directory ancestry inside sandbox root. Path traversing halted!".to_string())?;
66            if let Some(name) = target.file_name() {
67                resolved_parent.push(name);
68            }
69            resolved_parent
70        }
71    };
72
73    // Hard check against hallucinated drive letters that resolved cleanly across symlinks natively
74    let resolved_str = resolved_path
75        .to_string_lossy()
76        .to_string()
77        .to_lowercase()
78        .replace("\\", "/");
79    for protected in PROTECTED_FILES {
80        let prot_lower = protected.to_lowercase().replace("\\", "/");
81        if resolved_str.contains(&prot_lower) {
82            return Err(format!(
83                "AccessDenied: Canonicalized Sandbox resolution natively hits Blacklist bounds: {}",
84                protected
85            ));
86        }
87    }
88
89    let resolved_workspace = std::fs::canonicalize(workspace_root).unwrap_or_default();
90
91    // 3) Assess Physical Traversal Limits strictly against the Root Environment Prefix
92    // Normalize UNC prefixes for Windows compatibility in starts_with checks.
93    let norm_path = resolved_path
94        .to_string_lossy()
95        .trim_start_matches(r"\\?\")
96        .to_lowercase()
97        .replace("\\", "/");
98    let norm_workspace = resolved_workspace
99        .to_string_lossy()
100        .trim_start_matches(r"\\?\")
101        .to_lowercase()
102        .replace("\\", "/");
103
104    if !norm_path.starts_with(&norm_workspace) {
105        // RELAXED SANDBOX: Allow absolute paths IF they passed the blacklist checks above.
106        // Also allow sovereign tokens (@DESKTOP, ~) even if they aren't technically 'absolute' in a Path sense.
107        if target.is_absolute()
108            || target.to_string_lossy().starts_with('@')
109            || target.to_string_lossy().starts_with('~')
110        {
111            return Ok(resolved_path);
112        }
113        return Err(format!("AccessDenied: ⛔ SANDBOX BREACHED ⛔ Attempted directory traversal outside project bounds: {:?}", resolved_path));
114    }
115
116    Ok(resolved_path)
117}
118
119/// Hard-blocks Bash payloads unconditionally if they attempt to reference OS-critical locations
120#[allow(dead_code)]
121pub fn bash_is_safe(cmd: &str) -> Result<(), String> {
122    let lower = cmd
123        .to_lowercase()
124        .replace("\\", "/")
125        .replace("\u{005c}", "/")
126        .replace("%5c", "/");
127    for protected in PROTECTED_FILES {
128        let prot_lower = protected.to_lowercase().replace("\\", "/");
129        if lower.contains(&prot_lower) {
130            return Err(format!("AccessDenied: Bash command structurally attempts to manipulate blacklisted system area: {}", protected));
131        }
132    }
133
134    // Block using shell as a substitute for run_code.
135    // The model should use run_code directly — shell is the wrong tool for this.
136    let sandbox_redirects = [
137        "deno run",
138        "deno --version",
139        "deno -v",
140        "python -c ",
141        "python3 -c ",
142        "node -e ",
143        "node --eval",
144    ];
145    for pattern in sandbox_redirects {
146        if lower.contains(pattern) {
147            return Err(format!(
148                "Use the run_code tool instead of shell for executing {} code. \
149                 Shell is blocked for sandbox-style execution.",
150                pattern.split_whitespace().next().unwrap_or("code")
151            ));
152        }
153    }
154
155    let diagnostic_redirects = [
156        "nvidia-smi",
157        "wmic path win32_videocontroller",
158        "wmic path win32_perfformatteddata_gpu",
159    ];
160    for pattern in diagnostic_redirects {
161        if lower.contains(pattern) {
162            return Err(format!(
163                "Use the inspect_host tool with the relevant topic (e.g., topic=\"overclocker\" or topic=\"hardware\") \
164                 instead of shell for executing {} diagnostics. \
165                 Shell is blocked for raw hardware vitals to ensure high-fidelity bitmask decoding and session-wide history tracking.",
166                pattern.split_whitespace().next().unwrap_or("hardware")
167            ));
168        }
169    }
170
171    Ok(())
172}
173
174/// Three-tier risk classifier for shell commands.
175///
176/// Safe   → auto-approved (read-only, build, test, local git reads)
177/// High   → always requires user approval (destructive, network, privilege)
178/// Moderate → ask by default; can be configured to auto-approve
179pub fn classify_bash_risk(cmd: &str) -> RiskLevel {
180    let lower = cmd.to_lowercase();
181
182    // ── HIGH: destructive / network / privilege ────────────────────────────
183    let high = [
184        // File destruction
185        "rm -",
186        "rm /",
187        "del /",
188        "del /f",
189        "rmdir /s",
190        "remove-item -r",
191        // Network exfiltration
192        "curl ",
193        "wget ",
194        "invoke-webrequest",
195        "invoke-restmethod",
196        "fetch ",
197        // Privilege escalation
198        "sudo ",
199        "runas ",
200        "su -",
201        // Git remote writes
202        "git push",
203        "git force",
204        "git reset --hard",
205        "git clean -f",
206        // System
207        "shutdown",
208        "restart-computer",
209        "taskkill",
210        "format-volume",
211        "diskpart",
212        "format c",
213        "del c:\\",
214        // Secrets
215        ".ssh/",
216        ".aws/",
217        "credentials.json",
218    ];
219    if high.iter().any(|p| lower.contains(p)) {
220        return RiskLevel::High;
221    }
222
223    // ── SAFE: read-only, build, test, local git reads ──────────────────────
224    let safe_prefixes = [
225        "cargo check",
226        "cargo build",
227        "cargo test",
228        "cargo fmt",
229        "cargo clippy",
230        "cargo run",
231        "cargo doc",
232        "cargo tree",
233        "rustc ",
234        "rustfmt ",
235        "git status",
236        "git log",
237        "git diff",
238        "git branch",
239        "git show",
240        "git stash list",
241        "git remote -v",
242        "ls ",
243        "ls\n",
244        "dir ",
245        "dir\n",
246        "echo ",
247        "pwd",
248        "whoami",
249        "cat ",
250        "type ",
251        "head ",
252        "tail ",
253        "get-childitem",
254        "get-content",
255        "get-location",
256        "cargo --version",
257        "rustc --version",
258        "git --version",
259        "node --version",
260        "npm --version",
261        "python --version",
262        // Read-only search and inspection — must never require approval
263        "grep ",
264        "grep\n",
265        "rg ",
266        "rg\n",
267        "find ",
268        "find\n",
269        "select-string",
270        "select-object",
271        "where-object",
272        "sort ",
273        "sort\n",
274        "wc ",
275        "uniq ",
276        "cut ",
277        "file ",
278        "stat ",
279        "du ",
280        "df ",
281        // PowerShell wrapped read-only commands (Select-String, Get-ChildItem inside powershell -Command)
282        "powershell -command \"select-string",
283        "powershell -command \"get-childitem",
284        "powershell -command \"get-content",
285        "powershell -command \"get-counter",
286        "powershell -command 'select-string",
287        "powershell -command 'get-childitem",
288        "powershell -command 'get-counter",
289        "get-counter",
290        "get-item",
291        "test-path",
292        "select-object",
293        "powershell -command \"get-item",
294        "powershell -command \"test-path",
295        "powershell -command \"select-object",
296        "powershell -command 'get-item",
297        "powershell -command 'test-path",
298        "powershell -command 'select-object",
299        "get-smbencryptionstatus",
300        "get-smbshare",
301        "get-smbsession",
302        "get-netlanmanagerconnection",
303        // Scaffold / project init — non-destructive creation commands
304        "npm init",
305        "npm create",
306        "cargo new",
307        "cargo init",
308        "npx create-react-app",
309        "npx create-next-app",
310        "npx create-vue",
311        "npx create-svelte",
312        "npx astro",
313        "pnpm create",
314        "yarn create",
315        "django-admin startproject",
316        "python -m django startproject",
317        "mkdir ",
318        "mkdir\n",
319        "new-item -itemtype directory",
320        "new-item -type directory",
321    ];
322    if safe_prefixes
323        .iter()
324        .any(|p| lower.starts_with(p) || lower == p.trim())
325    {
326        return RiskLevel::Safe;
327    }
328
329    // ── MODERATE: mutation ops that don't destroy data ─────────────────────
330    RiskLevel::Moderate
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336    use std::path::Path;
337
338    #[test]
339    fn test_blacklist_windows_system() {
340        // Evaluate target string explicitly normalizing unicode and backslash injection vectors!
341        let root = Path::new("C:\\Users\\ocean\\Project");
342        let target = Path::new("C:\\Windows\\System32\\cmd.exe");
343        let result = path_is_safe(root, target);
344        assert!(
345            result.is_err(),
346            "Windows System directory should be blocked!"
347        );
348        assert!(result.unwrap_err().contains("Security Blacklist"));
349    }
350
351    #[test]
352    fn test_relative_parent_traversal_is_blocked() {
353        let root = std::env::current_dir().unwrap();
354        let result = path_is_safe(&root, Path::new(".."));
355        assert!(
356            result.is_err(),
357            "Relative traversal outside of workspace root should be blocked!"
358        );
359        assert!(result.unwrap_err().contains("SANDBOX BREACHED"));
360    }
361
362    #[test]
363    fn test_absolute_outside_path_is_allowed_when_not_blacklisted() {
364        let root = std::env::current_dir().unwrap();
365        if let Some(parent) = root.parent() {
366            let result = path_is_safe(&root, parent);
367            assert!(
368                result.is_ok(),
369                "Absolute non-blacklisted paths should follow the relaxed sandbox policy."
370            );
371        }
372    }
373
374    #[test]
375    fn test_bash_blacklist() {
376        let cmd = "ls C:\\Windows";
377        let result = bash_is_safe(cmd);
378        assert!(
379            result.is_err(),
380            "Bash command touching Windows should be blocked!"
381        );
382        assert!(result.unwrap_err().contains("blacklisted system area"));
383    }
384
385    #[test]
386    fn test_risk_classification() {
387        assert_eq!(classify_bash_risk("cargo check"), RiskLevel::Safe);
388        assert_eq!(classify_bash_risk("rm -rf /"), RiskLevel::High);
389        assert_eq!(classify_bash_risk("mkdir new_dir"), RiskLevel::Moderate);
390        assert_eq!(
391            classify_bash_risk("get-counter '\\PhysicalDisk(_Total)\\Avg. Disk Queue Length'"),
392            RiskLevel::Safe
393        );
394        assert_eq!(classify_bash_risk("powershell -command \"get-counter '\\PhysicalDisk(_Total)\\Avg. Disk Queue Length'\""), RiskLevel::Safe);
395    }
396}