hematite/tools/risk_evaluator.rs
1use super::tool::RiskLevel;
2
3/// Local Risk Evaluator — a fast, deterministic triage pass that classifies
4/// Swarm worker actions by risk level. LOW risk actions auto-approve silently,
5/// MODERATE actions log a warning, and HIGH risk actions trigger the Red Modal.
6///
7/// This eliminates the UX bottleneck of prompting the user for every single
8/// file read while still enforcing hard safety gates on destructive operations.
9#[allow(dead_code)]
10pub struct RiskEvaluator;
11
12#[allow(dead_code)]
13impl RiskEvaluator {
14 /// Classifies a tool invocation based on its name and arguments.
15 /// Returns the RiskLevel which determines whether the action
16 /// auto-approves, warns, or blocks behind the Red Modal.
17 pub fn classify(tool_name: &str, args_preview: &str) -> RiskLevel {
18 // HIGH RISK: Anything that can destroy data, push to remotes,
19 // or modify system-level configuration. Always blocks.
20 let high_risk_patterns = [
21 "rm ",
22 "rm -",
23 "del ",
24 "rmdir",
25 "git push",
26 "git reset --hard",
27 "format-volume",
28 "diskpart",
29 "system32",
30 "C:\\Windows",
31 "shutdown",
32 "taskkill",
33 ".env",
34 ".ssh",
35 ".gitconfig",
36 "curl ",
37 "wget ",
38 "Invoke-WebRequest",
39 "chmod 777",
40 "sudo ",
41 ];
42
43 for pattern in &high_risk_patterns {
44 if args_preview.contains(pattern) {
45 return RiskLevel::High;
46 }
47 }
48
49 // HIGH RISK: Tool-level classification for inherently destructive tools
50 match tool_name {
51 "BashTool" | "PowerShellTool" => {
52 // Bash is MODERATE by default — it could do anything.
53 // But specific args above escalate it to HIGH.
54 return RiskLevel::Moderate;
55 }
56 "FileWriteTool" | "FileEditTool" => {
57 return RiskLevel::Moderate;
58 }
59 "git_commit" => {
60 return RiskLevel::High;
61 }
62 _ => {}
63 }
64
65 // SAFE: Read-only operations that can never modify state.
66 let safe_tools = [
67 "FileReadTool",
68 "GlobTool",
69 "GrepTool",
70 "SlimLspTool",
71 "ToolSearchTool",
72 ];
73
74 if safe_tools.contains(&tool_name) {
75 return RiskLevel::Safe;
76 }
77
78 // Default: anything unknown is MODERATE — log but don't block
79 RiskLevel::Moderate
80 }
81
82 /// Returns true if the action can be auto-approved without user input.
83 pub fn can_auto_approve(level: RiskLevel, yolo_mode: bool) -> bool {
84 match level {
85 RiskLevel::Safe => true,
86 RiskLevel::Moderate => yolo_mode, // Only auto-approve in YOLO mode
87 RiskLevel::High => false, // NEVER auto-approve, even in YOLO
88 }
89 }
90}