Skip to main content

split_brain_harness/
policy.rs

1/// Static policy layer — checks a CapabilityRequest against hard rules before
2/// the supervisor allows any execution. All checks are deterministic and cheap.
3///
4/// Rules enforced here:
5/// - network access is forbidden (no_network must be true)
6/// - filesystem writes are forbidden (read_only_input must be true)
7/// - resource limits must not exceed supervisor-side maximums
8/// - the request must pass its own validate() check
9use crate::capability::{Budget, CapabilityRequest, PolicyViolation, ToolMetrics};
10
11/// Hard-coded ceilings the supervisor will never negotiate past.
12const MAX_RUNTIME_MS: u64 = 10_000;
13const MAX_MEMORY_MB: u64 = 256;
14
15/// Per-session accounting state tracked alongside the Budget.
16#[derive(Debug, Default)]
17pub struct PolicyState {
18    pub tools_invoked: usize,
19    pub consecutive_failures: usize,
20    pub total_runtime_ms: u64,
21}
22
23impl PolicyState {
24    /// Update state after a completed run.
25    pub fn record_run(&mut self, metrics: &ToolMetrics) {
26        self.tools_invoked += 1;
27        self.total_runtime_ms += metrics.runtime_ms;
28        if metrics.success {
29            self.consecutive_failures = 0;
30        } else {
31            self.consecutive_failures += 1;
32        }
33    }
34
35    /// Returns Some(reason) if any budget limit is already exhausted.
36    pub fn budget_exceeded(&self, budget: &Budget) -> Option<String> {
37        if self.tools_invoked >= budget.max_tools_per_session {
38            return Some(format!(
39                "session tool limit reached ({}/{})",
40                self.tools_invoked, budget.max_tools_per_session
41            ));
42        }
43        if self.total_runtime_ms >= budget.max_total_runtime_ms {
44            return Some(format!(
45                "session runtime budget exhausted ({}ms/{}ms)",
46                self.total_runtime_ms, budget.max_total_runtime_ms
47            ));
48        }
49        if self.consecutive_failures >= budget.require_approval_after_failures {
50            return Some(format!(
51                "{} consecutive failures — user approval required before continuing",
52                self.consecutive_failures
53            ));
54        }
55        None
56    }
57}
58
59/// Run all static policy checks against a request. Returns a list of violations;
60/// an empty list means the request is clean.
61pub fn check_request(req: &CapabilityRequest) -> Vec<PolicyViolation> {
62    let mut violations: Vec<PolicyViolation> = vec![];
63
64    // Structural validity
65    if let Err(e) = req.validate() {
66        violations.push(PolicyViolation {
67            rule: "structural_validity".into(),
68            detail: e,
69        });
70    }
71
72    // Network access
73    if !req.constraints.no_network {
74        violations.push(PolicyViolation {
75            rule: "no_network".into(),
76            detail: "capability_request.constraints.no_network must be true".into(),
77        });
78    }
79
80    // Read-only input
81    if !req.constraints.read_only_input {
82        violations.push(PolicyViolation {
83            rule: "read_only_input".into(),
84            detail: "capability_request.constraints.read_only_input must be true".into(),
85        });
86    }
87
88    // Runtime ceiling
89    if req.constraints.max_runtime_ms > MAX_RUNTIME_MS {
90        violations.push(PolicyViolation {
91            rule: "max_runtime_ms".into(),
92            detail: format!(
93                "requested {}ms exceeds supervisor ceiling of {}ms",
94                req.constraints.max_runtime_ms, MAX_RUNTIME_MS
95            ),
96        });
97    }
98
99    // Memory ceiling
100    if req.constraints.max_memory_mb > MAX_MEMORY_MB {
101        violations.push(PolicyViolation {
102            rule: "max_memory_mb".into(),
103            detail: format!(
104                "requested {}MB exceeds supervisor ceiling of {}MB",
105                req.constraints.max_memory_mb, MAX_MEMORY_MB
106            ),
107        });
108    }
109
110    violations
111}
112
113// ---------------------------------------------------------------------------
114// Tests
115// ---------------------------------------------------------------------------
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120    use crate::capability::CapabilityConstraints;
121
122    fn clean_request() -> CapabilityRequest {
123        CapabilityRequest {
124            kind: "capability_request".into(),
125            capability: "test_cap".into(),
126            input_contract: "utf8 text".into(),
127            output_contract: "json".into(),
128            constraints: CapabilityConstraints::default(),
129            reason: "text reasoning insufficient".into(),
130        }
131    }
132
133    #[test]
134    fn clean_request_has_no_violations() {
135        let req = clean_request();
136        assert!(check_request(&req).is_empty());
137    }
138
139    #[test]
140    fn network_access_rejected() {
141        let mut req = clean_request();
142        req.constraints.no_network = false;
143        let v = check_request(&req);
144        assert!(v.iter().any(|v| v.rule == "no_network"));
145    }
146
147    #[test]
148    fn non_readonly_rejected() {
149        let mut req = clean_request();
150        req.constraints.read_only_input = false;
151        let v = check_request(&req);
152        assert!(v.iter().any(|v| v.rule == "read_only_input"));
153    }
154
155    #[test]
156    fn excessive_runtime_rejected() {
157        let mut req = clean_request();
158        req.constraints.max_runtime_ms = 99_999;
159        let v = check_request(&req);
160        assert!(v.iter().any(|v| v.rule == "max_runtime_ms"));
161    }
162
163    #[test]
164    fn excessive_memory_rejected() {
165        let mut req = clean_request();
166        req.constraints.max_memory_mb = 512;
167        let v = check_request(&req);
168        assert!(v.iter().any(|v| v.rule == "max_memory_mb"));
169    }
170
171    #[test]
172    fn invalid_kind_rejected() {
173        let mut req = clean_request();
174        req.kind = "wrong".into();
175        let v = check_request(&req);
176        assert!(v.iter().any(|v| v.rule == "structural_validity"));
177    }
178
179    #[test]
180    fn budget_exceeded_on_tool_limit() {
181        let budget = Budget {
182            max_tools_per_session: 2,
183            ..Budget::default()
184        };
185        let state = PolicyState {
186            tools_invoked: 2,
187            ..PolicyState::default()
188        };
189        assert!(state.budget_exceeded(&budget).is_some());
190    }
191
192    #[test]
193    fn budget_ok_under_limit() {
194        let budget = Budget::default();
195        let state = PolicyState::default();
196        assert!(state.budget_exceeded(&budget).is_none());
197    }
198
199    #[test]
200    fn consecutive_failures_trigger_approval() {
201        let budget = Budget {
202            require_approval_after_failures: 2,
203            ..Budget::default()
204        };
205        let state = PolicyState {
206            consecutive_failures: 2,
207            ..PolicyState::default()
208        };
209        let reason = state.budget_exceeded(&budget).unwrap();
210        assert!(reason.contains("approval"));
211    }
212
213    #[test]
214    fn success_resets_consecutive_failures() {
215        let mut state = PolicyState {
216            consecutive_failures: 3,
217            ..Default::default()
218        };
219        state.record_run(&ToolMetrics {
220            success: true,
221            runtime_ms: 10,
222            ..Default::default()
223        });
224        assert_eq!(state.consecutive_failures, 0);
225    }
226
227    #[test]
228    fn failure_increments_consecutive_failures() {
229        let mut state = PolicyState::default();
230        state.record_run(&ToolMetrics {
231            success: false,
232            runtime_ms: 5,
233            ..Default::default()
234        });
235        assert_eq!(state.consecutive_failures, 1);
236    }
237}