hematite/tools/
guard.rs

1use super::tool::RiskLevel;
2use std::path::{Path, PathBuf};
3
4#[allow(dead_code)]
5pub const PROTECTED_FILES: &[&str] = &[
6    // Windows System
7    "C:\\Windows",
8    "C:\\Program Files",
9    "C:\\$Recycle.Bin",
10    "System Volume Information",
11    "C:\\Users\\Default",
12    // Linux/Unix System
13    "/etc",
14    "/dev",
15    "/proc",
16    "/sys",
17    "/root",
18    "/var/log",
19    "/boot",
20    // User Sensitives
21    ".bashrc",
22    ".zshrc",
23    ".bash_history",
24    ".gitconfig",
25    ".ssh/",
26    ".aws/",
27    ".env",
28    "credentials.json",
29    "auth.json",
30    "id_rsa",
31    // Hematite Internal
32    ".mcp.json",
33    "hematite_memory.db",
34];
35
36/// Enforces the absolute Canonical Traversal lock on the LLM, rendering directory climbing (`../`) obsolete
37/// and blocking any OS-critical reads aggressively by cross-referencing global blacklists.
38#[allow(dead_code)]
39pub fn path_is_safe(workspace_root: &Path, target: &Path) -> Result<PathBuf, String> {
40    // 1) Evaluate target string explicitly normalizing unicode and backslash injection vectors!
41    let mut target_str = target.to_string_lossy().to_string().to_lowercase();
42    target_str = target_str
43        .replace("\\", "/")
44        .replace("\u{005c}", "/")
45        .replace("%5c", "/");
46
47    // Early evaluation covering read-only "Ghosting" on target secrets explicitly
48    for protected in PROTECTED_FILES {
49        let prot_lower = protected.to_lowercase().replace("\\", "/");
50        if target_str.contains(&prot_lower) {
51            return Err(format!(
52                "AccessDenied: Path {} hits the Hematite Security Blacklist natively: {}",
53                target_str, protected
54            ));
55        }
56    }
57
58    // 2) Native Canonicalization - Forcing OS Reality Context over LLM hallucinations
59    let resolved_path = match std::fs::canonicalize(target) {
60        Ok(p) => p,
61        Err(_) => {
62            // If creating a brand new isolated file, physically trace the parent node
63            let parent = target.parent().unwrap_or(Path::new(""));
64            let mut resolved_parent = std::fs::canonicalize(parent)
65                .map_err(|_| "AccessDenied: Invalid directory ancestry inside sandbox root. Path traversing halted!".to_string())?;
66            if let Some(name) = target.file_name() {
67                resolved_parent.push(name);
68            }
69            resolved_parent
70        }
71    };
72
73    // Hard check against hallucinated drive letters that resolved cleanly across symlinks natively
74    let resolved_str = resolved_path
75        .to_string_lossy()
76        .to_string()
77        .to_lowercase()
78        .replace("\\", "/");
79    for protected in PROTECTED_FILES {
80        let prot_lower = protected.to_lowercase().replace("\\", "/");
81        if resolved_str.contains(&prot_lower) {
82            return Err(format!(
83                "AccessDenied: Canonicalized Sandbox resolution natively hits Blacklist bounds: {}",
84                protected
85            ));
86        }
87    }
88
89    let resolved_workspace = std::fs::canonicalize(workspace_root).unwrap_or_default();
90
91    // 3) Assess Physical Traversal Limits strictly against the Root Environment Prefix
92    if !resolved_path.starts_with(&resolved_workspace) {
93        // RELAXED SANDBOX: Allow absolute paths IF they passed the blacklist checks above.
94        if target.is_absolute() {
95            return Ok(resolved_path);
96        }
97        return Err(format!("AccessDenied: ⛔ SANDBOX BREACHED ⛔ Attempted directory traversal outside project bounds: {:?}", resolved_path));
98    }
99
100    Ok(resolved_path)
101}
102
103/// Hard-blocks Bash payloads unconditionally if they attempt to reference OS-critical locations
104#[allow(dead_code)]
105pub fn bash_is_safe(cmd: &str) -> Result<(), String> {
106    let lower = cmd
107        .to_lowercase()
108        .replace("\\", "/")
109        .replace("\u{005c}", "/")
110        .replace("%5c", "/");
111    for protected in PROTECTED_FILES {
112        let prot_lower = protected.to_lowercase().replace("\\", "/");
113        if lower.contains(&prot_lower) {
114            return Err(format!("AccessDenied: Bash command structurally attempts to manipulate blacklisted system area: {}", protected));
115        }
116    }
117
118    // Block using shell as a substitute for run_code.
119    // The model should use run_code directly — shell is the wrong tool for this.
120    let sandbox_redirects = [
121        "deno run",
122        "deno --version",
123        "deno -v",
124        "python -c ",
125        "python3 -c ",
126        "node -e ",
127        "node --eval",
128    ];
129    for pattern in sandbox_redirects {
130        if lower.contains(pattern) {
131            return Err(format!(
132                "Use the run_code tool instead of shell for executing {} code. \
133                 Shell is blocked for sandbox-style execution.",
134                pattern.split_whitespace().next().unwrap_or("code")
135            ));
136        }
137    }
138
139    Ok(())
140}
141
142/// Three-tier risk classifier for shell commands.
143///
144/// Safe   → auto-approved (read-only, build, test, local git reads)
145/// High   → always requires user approval (destructive, network, privilege)
146/// Moderate → ask by default; can be configured to auto-approve
147pub fn classify_bash_risk(cmd: &str) -> RiskLevel {
148    let lower = cmd.to_lowercase();
149
150    // ── HIGH: destructive / network / privilege ────────────────────────────
151    let high = [
152        // File destruction
153        "rm -",
154        "rm /",
155        "del /",
156        "del /f",
157        "rmdir /s",
158        "remove-item -r",
159        // Network exfiltration
160        "curl ",
161        "wget ",
162        "invoke-webrequest",
163        "invoke-restmethod",
164        "fetch ",
165        // Privilege escalation
166        "sudo ",
167        "runas ",
168        "su -",
169        // Git remote writes
170        "git push",
171        "git force",
172        "git reset --hard",
173        "git clean -f",
174        // System
175        "shutdown",
176        "restart-computer",
177        "taskkill",
178        "format-volume",
179        "diskpart",
180        "format c",
181        "del c:\\",
182        // Secrets
183        ".ssh/",
184        ".aws/",
185        "credentials.json",
186    ];
187    if high.iter().any(|p| lower.contains(p)) {
188        return RiskLevel::High;
189    }
190
191    // ── SAFE: read-only, build, test, local git reads ──────────────────────
192    let safe_prefixes = [
193        "cargo check",
194        "cargo build",
195        "cargo test",
196        "cargo fmt",
197        "cargo clippy",
198        "cargo run",
199        "cargo doc",
200        "cargo tree",
201        "rustc ",
202        "rustfmt ",
203        "git status",
204        "git log",
205        "git diff",
206        "git branch",
207        "git show",
208        "git stash list",
209        "git remote -v",
210        "ls ",
211        "ls\n",
212        "dir ",
213        "dir\n",
214        "echo ",
215        "pwd",
216        "whoami",
217        "cat ",
218        "type ",
219        "head ",
220        "tail ",
221        "get-childitem",
222        "get-content",
223        "get-location",
224        "cargo --version",
225        "rustc --version",
226        "git --version",
227        "node --version",
228        "npm --version",
229        "python --version",
230        // Read-only search and inspection — must never require approval
231        "grep ",
232        "grep\n",
233        "rg ",
234        "rg\n",
235        "find ",
236        "find\n",
237        "select-string",
238        "select-object",
239        "where-object",
240        "sort ",
241        "sort\n",
242        "wc ",
243        "uniq ",
244        "cut ",
245        "file ",
246        "stat ",
247        "du ",
248        "df ",
249        // PowerShell wrapped read-only commands (Select-String, Get-ChildItem inside powershell -Command)
250        "powershell -command \"select-string",
251        "powershell -command \"get-childitem",
252        "powershell -command \"get-content",
253        "powershell -command \"get-counter",
254        "powershell -command 'select-string",
255        "powershell -command 'get-childitem",
256        "powershell -command 'get-counter",
257        "get-counter",
258        "get-item",
259        "test-path",
260        "select-object",
261        "powershell -command \"get-item",
262        "powershell -command \"test-path",
263        "powershell -command \"select-object",
264        "powershell -command 'get-item",
265        "powershell -command 'test-path",
266        "powershell -command 'select-object",
267        "get-smbencryptionstatus",
268        "get-smbshare",
269        "get-smbsession",
270        "get-netlanmanagerconnection",
271    ];
272    if safe_prefixes
273        .iter()
274        .any(|p| lower.starts_with(p) || lower == p.trim())
275    {
276        return RiskLevel::Safe;
277    }
278
279    // ── MODERATE: mutation ops that don't destroy data ─────────────────────
280    RiskLevel::Moderate
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286    use std::path::Path;
287
288    #[test]
289    fn test_blacklist_windows_system() {
290        // Evaluate target string explicitly normalizing unicode and backslash injection vectors!
291        let root = Path::new("C:\\Users\\ocean\\Project");
292        let target = Path::new("C:\\Windows\\System32\\cmd.exe");
293        let result = path_is_safe(root, target);
294        assert!(
295            result.is_err(),
296            "Windows System directory should be blocked!"
297        );
298        assert!(result.unwrap_err().contains("Security Blacklist"));
299    }
300
301    #[test]
302    fn test_relative_parent_traversal_is_blocked() {
303        let root = std::env::current_dir().unwrap();
304        let result = path_is_safe(&root, Path::new(".."));
305        assert!(
306            result.is_err(),
307            "Relative traversal outside of workspace root should be blocked!"
308        );
309        assert!(result.unwrap_err().contains("SANDBOX BREACHED"));
310    }
311
312    #[test]
313    fn test_absolute_outside_path_is_allowed_when_not_blacklisted() {
314        let root = std::env::current_dir().unwrap();
315        if let Some(parent) = root.parent() {
316            let result = path_is_safe(&root, parent);
317            assert!(
318                result.is_ok(),
319                "Absolute non-blacklisted paths should follow the relaxed sandbox policy."
320            );
321        }
322    }
323
324    #[test]
325    fn test_bash_blacklist() {
326        let cmd = "ls C:\\Windows";
327        let result = bash_is_safe(cmd);
328        assert!(
329            result.is_err(),
330            "Bash command touching Windows should be blocked!"
331        );
332        assert!(result.unwrap_err().contains("blacklisted system area"));
333    }
334
335    #[test]
336    fn test_risk_classification() {
337        assert_eq!(classify_bash_risk("cargo check"), RiskLevel::Safe);
338        assert_eq!(classify_bash_risk("rm -rf /"), RiskLevel::High);
339        assert_eq!(classify_bash_risk("mkdir new_dir"), RiskLevel::Moderate);
340        assert_eq!(
341            classify_bash_risk("get-counter '\\PhysicalDisk(_Total)\\Avg. Disk Queue Length'"),
342            RiskLevel::Safe
343        );
344        assert_eq!(classify_bash_risk("powershell -command \"get-counter '\\PhysicalDisk(_Total)\\Avg. Disk Queue Length'\""), RiskLevel::Safe);
345    }
346}
hematite/tools/guard.rs

hematite/tools/
guard.rs