Skip to main content

sparrow/autonomy/
mod.rs

1use serde::{Deserialize, Serialize};
2
3use crate::event::{CheckpointId, Decision, RiskLevel};
4
5pub use crate::event::AutonomyLevel;
6
7// ─── Autonomy contract ──────────────────────────────────────────────────────────
8
9/// Continuous trust contract attached to every run.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct AutonomyContract {
12    pub level: AutonomyLevel,
13    pub approve: ApprovalPolicy,
14    pub budget: Budget,
15    pub stops: Vec<HardStop>,
16}
17
18impl AutonomyContract {
19    pub fn supervised() -> Self {
20        Self {
21            level: AutonomyLevel::Supervised,
22            approve: ApprovalPolicy::default_supervised(),
23            budget: Budget::default(),
24            stops: vec![
25                HardStop::RiskLevel(RiskLevel::Destructive),
26                HardStop::BudgetExceeded,
27            ],
28        }
29    }
30
31    pub fn trusted() -> Self {
32        Self {
33            level: AutonomyLevel::Trusted,
34            approve: ApprovalPolicy::default_trusted(),
35            budget: Budget::default(),
36            stops: vec![HardStop::RiskLevel(RiskLevel::Destructive)],
37        }
38    }
39
40    pub fn autonomous() -> Self {
41        Self {
42            level: AutonomyLevel::Autonomous,
43            approve: ApprovalPolicy::default_autonomous(),
44            budget: Budget::default(),
45            stops: vec![],
46        }
47    }
48
49    pub fn decide(&self, action: &ProposedAction) -> Decision {
50        self.evaluate(action).decision
51    }
52
53    pub fn evaluate(&self, action: &ProposedAction) -> AutonomyVerdict {
54        // Check hard stops first
55        for stop in &self.stops {
56            match stop {
57                HardStop::RiskLevel(rl) if action.risk == *rl => {
58                    return AutonomyVerdict::new(
59                        Decision::Deny,
60                        false,
61                        false,
62                        format!("hard stop blocks {:?} actions", action.risk),
63                    );
64                }
65                _ => {}
66            }
67        }
68        let decision = self.approve.decide(action);
69        let needs_checkpoint = matches!(
70            action.risk,
71            RiskLevel::Mutating | RiskLevel::Exec | RiskLevel::Destructive
72        ) && matches!(decision, Decision::Allow | Decision::AskUser);
73        let notify = matches!(self.level, AutonomyLevel::Trusted)
74            && matches!(decision, Decision::Allow)
75            && matches!(action.risk, RiskLevel::Mutating | RiskLevel::Exec);
76        AutonomyVerdict::new(
77            decision,
78            needs_checkpoint,
79            notify,
80            format!("{:?} policy for tool '{}'", self.level, action.tool_name),
81        )
82    }
83}
84
85#[derive(Debug, Clone, PartialEq, Eq)]
86pub struct AutonomyVerdict {
87    pub decision: Decision,
88    pub needs_checkpoint: bool,
89    pub notify: bool,
90    pub reason: String,
91}
92
93impl AutonomyVerdict {
94    fn new(
95        decision: Decision,
96        needs_checkpoint: bool,
97        notify: bool,
98        reason: impl Into<String>,
99    ) -> Self {
100        Self {
101            decision,
102            needs_checkpoint,
103            notify,
104            reason: reason.into(),
105        }
106    }
107}
108
109// ─── Approval policy ────────────────────────────────────────────────────────────
110
111#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct ApprovalPolicy {
113    pub read_only: Decision,
114    pub mutating: Decision,
115    pub exec: Decision,
116    pub destructive: Decision,
117    pub network: Decision,
118}
119
120impl ApprovalPolicy {
121    pub fn default_supervised() -> Self {
122        Self {
123            read_only: Decision::Allow,
124            mutating: Decision::AskUser,
125            exec: Decision::AskUser,
126            destructive: Decision::Deny,
127            network: Decision::AskUser,
128        }
129    }
130
131    pub fn default_trusted() -> Self {
132        Self {
133            read_only: Decision::Allow,
134            mutating: Decision::Allow,
135            exec: Decision::Allow,
136            destructive: Decision::AskUser,
137            network: Decision::Allow,
138        }
139    }
140
141    pub fn default_autonomous() -> Self {
142        Self {
143            read_only: Decision::Allow,
144            mutating: Decision::Allow,
145            exec: Decision::Allow,
146            destructive: Decision::AskUser,
147            network: Decision::Allow,
148        }
149    }
150
151    pub fn decide(&self, action: &ProposedAction) -> Decision {
152        match action.risk {
153            RiskLevel::ReadOnly => self.read_only.clone(),
154            RiskLevel::Mutating => self.mutating.clone(),
155            RiskLevel::Exec => self.exec.clone(),
156            RiskLevel::Destructive => self.destructive.clone(),
157            RiskLevel::Network => self.network.clone(),
158        }
159    }
160}
161
162// ─── Proposed action ────────────────────────────────────────────────────────────
163
164#[derive(Debug, Clone)]
165pub struct ProposedAction {
166    pub tool_name: String,
167    pub risk: RiskLevel,
168    pub args: serde_json::Value,
169}
170
171// ─── Budget ─────────────────────────────────────────────────────────────────────
172
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct Budget {
175    pub max_usd: f64,
176    pub max_tokens: u64,
177    pub max_wallclock_secs: u64,
178}
179
180impl Default for Budget {
181    fn default() -> Self {
182        Self {
183            max_usd: 5.0,
184            max_tokens: 100_000,
185            max_wallclock_secs: 3600,
186        }
187    }
188}
189
190// ─── Hard stops ─────────────────────────────────────────────────────────────────
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub enum HardStop {
194    RiskLevel(RiskLevel),
195    BudgetExceeded,
196    SandboxEscape,
197    RepeatedToolFailure,
198}
199
200// ─── THE GATE TRAIT ─────────────────────────────────────────────────────────────
201
202pub trait Gate: Send + Sync {
203    fn decide(&self, action: &ProposedAction) -> Decision;
204}
205
206impl Gate for AutonomyContract {
207    fn decide(&self, action: &ProposedAction) -> Decision {
208        self.decide(action)
209    }
210}
211
212// ─── Checkpoints ────────────────────────────────────────────────────────────────
213
214#[derive(Debug, Clone)]
215pub struct Checkpoint {
216    pub id: CheckpointId,
217    pub label: String,
218    pub timestamp: chrono::DateTime<chrono::Utc>,
219}
220
221/// Snapshot and rewind workspace state.
222pub trait Checkpoints: Send + Sync {
223    fn snapshot(&self, label: &str) -> anyhow::Result<CheckpointId>;
224    fn list(&self) -> Vec<Checkpoint>;
225    fn rewind(&self, to: CheckpointId) -> anyhow::Result<()>;
226    /// Show unified diff between HEAD and a checkpoint ref.
227    fn diff(&self, id: &CheckpointId) -> anyhow::Result<String>;
228    /// Delete checkpoint refs older than `older_than_days` days.
229    /// Returns the number of refs deleted.
230    fn prune(&self, older_than_days: u64) -> anyhow::Result<usize>;
231}
232
233/// Git-backed checkpoint implementation (basic, M0).
234pub struct GitCheckpoints {
235    repo_path: std::path::PathBuf,
236}
237
238impl GitCheckpoints {
239    pub fn new(repo_path: std::path::PathBuf) -> Self {
240        Self { repo_path }
241    }
242}
243
244impl Checkpoints for GitCheckpoints {
245    fn snapshot(&self, label: &str) -> anyhow::Result<CheckpointId> {
246        let id = CheckpointId::new();
247        use std::process::Command;
248
249        let in_repo = Command::new("git")
250            .args(["rev-parse", "--is-inside-work-tree"])
251            .current_dir(&self.repo_path)
252            .output()?;
253
254        if !in_repo.status.success() {
255            anyhow::bail!("Not a git repository: {}", self.repo_path.display());
256        }
257
258        let stash = Command::new("git")
259            .args(["stash", "create", &format!("SPARROW-CHECKPOINT: {}", label)])
260            .current_dir(&self.repo_path)
261            .output()?;
262
263        let mut sha = String::from_utf8_lossy(&stash.stdout).trim().to_string();
264        if sha.is_empty() {
265            let head = Command::new("git")
266                .args(["rev-parse", "HEAD"])
267                .current_dir(&self.repo_path)
268                .output()?;
269            if !head.status.success() {
270                anyhow::bail!("Cannot create checkpoint without HEAD");
271            }
272            sha = String::from_utf8_lossy(&head.stdout).trim().to_string();
273        }
274
275        let ref_name = format!("refs/sparrow/checkpoints/{}", id.0);
276        let status = Command::new("git")
277            .args(["update-ref", &ref_name, &sha])
278            .current_dir(&self.repo_path)
279            .status()?;
280
281        if !status.success() {
282            anyhow::bail!("Failed to save checkpoint ref {}", ref_name);
283        }
284
285        Ok(id)
286    }
287
288    fn list(&self) -> Vec<Checkpoint> {
289        use std::process::Command;
290        let output = Command::new("git")
291            .args([
292                "for-each-ref",
293                "refs/sparrow/checkpoints",
294                "--format=%(refname:short) %(objectname:short) %(creatordate:iso)",
295            ])
296            .current_dir(&self.repo_path)
297            .output()
298            .ok();
299
300        let mut checkpoints = Vec::new();
301        if let Some(output) = output {
302            let text = String::from_utf8_lossy(&output.stdout);
303            for line in text.lines() {
304                let parts: Vec<&str> = line.split_whitespace().collect();
305                if let Some(name) = parts.first() {
306                    let id = name.rsplit('/').next().unwrap_or(name).to_string();
307                    checkpoints.push(Checkpoint {
308                        id: CheckpointId(id.clone()),
309                        label: format!("checkpoint {}", id),
310                        timestamp: chrono::Utc::now(),
311                    });
312                }
313            }
314        }
315        checkpoints
316    }
317
318    fn rewind(&self, to: CheckpointId) -> anyhow::Result<()> {
319        use std::process::Command;
320        let ref_name = format!("refs/sparrow/checkpoints/{}", to.0);
321        let status = Command::new("git")
322            .args(["reset", "--hard", &ref_name])
323            .current_dir(&self.repo_path)
324            .status()?;
325
326        if !status.success() {
327            anyhow::bail!("Failed to rewind to checkpoint {}", to.0);
328        }
329        Ok(())
330    }
331
332    fn diff(&self, id: &CheckpointId) -> anyhow::Result<String> {
333        use std::process::Command;
334        let ref_name = format!("refs/sparrow/checkpoints/{}", id.0);
335        let output = Command::new("git")
336            .args(["diff", &ref_name, "HEAD"])
337            .current_dir(&self.repo_path)
338            .output()?;
339        if !output.status.success() {
340            let err = String::from_utf8_lossy(&output.stderr).trim().to_string();
341            anyhow::bail!("git diff failed for checkpoint {}: {}", id.0, err);
342        }
343        Ok(String::from_utf8_lossy(&output.stdout).to_string())
344    }
345
346    fn prune(&self, older_than_days: u64) -> anyhow::Result<usize> {
347        use std::process::Command;
348        // List all sparrow checkpoint refs with their creator dates
349        let output = Command::new("git")
350            .args([
351                "for-each-ref",
352                "refs/sparrow/checkpoints",
353                "--format=%(refname) %(creatordate:unix)",
354            ])
355            .current_dir(&self.repo_path)
356            .output()?;
357        if !output.status.success() {
358            return Ok(0);
359        }
360        let cutoff = chrono::Utc::now()
361            .timestamp()
362            .saturating_sub((older_than_days * 86_400) as i64);
363        let text = String::from_utf8_lossy(&output.stdout);
364        let to_delete: Vec<String> = text
365            .lines()
366            .filter_map(|line| {
367                let mut parts = line.splitn(2, ' ');
368                let refname = parts.next()?.trim().to_string();
369                let ts: i64 = parts.next()?.trim().parse().ok()?;
370                if ts < cutoff { Some(refname) } else { None }
371            })
372            .collect();
373        let count = to_delete.len();
374        for refname in &to_delete {
375            let _ = Command::new("git")
376                .args(["update-ref", "-d", refname])
377                .current_dir(&self.repo_path)
378                .status();
379        }
380        Ok(count)
381    }
382}
383
384#[cfg(test)]
385mod tests {
386    use super::*;
387
388    fn action(risk: RiskLevel) -> ProposedAction {
389        ProposedAction {
390            tool_name: "edit".into(),
391            risk,
392            args: serde_json::json!({}),
393        }
394    }
395
396    #[test]
397    fn trusted_mutating_verdict_requires_checkpoint_and_notify() {
398        let verdict = AutonomyContract::trusted().evaluate(&action(RiskLevel::Mutating));
399        assert_eq!(verdict.decision, Decision::Allow);
400        assert!(verdict.needs_checkpoint);
401        assert!(verdict.notify);
402    }
403
404    #[test]
405    fn supervised_readonly_verdict_needs_no_checkpoint() {
406        let verdict = AutonomyContract::supervised().evaluate(&action(RiskLevel::ReadOnly));
407        assert_eq!(verdict.decision, Decision::Allow);
408        assert!(!verdict.needs_checkpoint);
409        assert!(!verdict.notify);
410    }
411
412    #[test]
413    fn hard_stop_verdict_denies_without_checkpoint() {
414        let mut contract = AutonomyContract::autonomous();
415        contract
416            .stops
417            .push(HardStop::RiskLevel(RiskLevel::Destructive));
418
419        let verdict = contract.evaluate(&action(RiskLevel::Destructive));
420        assert_eq!(verdict.decision, Decision::Deny);
421        assert!(!verdict.needs_checkpoint);
422        assert!(!verdict.notify);
423        assert!(verdict.reason.contains("hard stop"));
424    }
425}