car_verify/
lib.rs

1//! Formal verification for Agent IR.
2//!
3//! Given a state S and proposal P, you can:
4//! 1. **verify**: Prove P is satisfiable in S without executing
5//! 2. **simulate**: Compute expected final state S' without tools
6//! 3. **equivalent**: Show two proposals produce identical state
7//! 4. **optimize**: Reorder actions provably safely
8
9use car_ir::precondition::{self, StateView};
10use car_ir::{build_dag, Action, ActionProposal, ActionType};
11use serde_json::Value;
12use std::collections::{HashMap, HashSet};
13
14/// Symbolic state for static analysis.
15#[derive(Debug, Clone)]
16pub struct StaticState {
17    pub known: HashMap<String, Value>,
18    pub unknown_keys: HashSet<String>,
19}
20
21impl StaticState {
22    pub fn new() -> Self {
23        Self {
24            known: HashMap::new(),
25            unknown_keys: HashSet::new(),
26        }
27    }
28
29    pub fn from_map(map: HashMap<String, Value>) -> Self {
30        Self {
31            known: map,
32            unknown_keys: HashSet::new(),
33        }
34    }
35
36    pub fn get(&self, key: &str) -> Option<&Value> {
37        self.known.get(key)
38    }
39
40    pub fn exists(&self, key: &str) -> bool {
41        self.known.contains_key(key)
42    }
43
44    pub fn is_unknown(&self, key: &str) -> bool {
45        self.unknown_keys.contains(key)
46    }
47
48    pub fn set(&mut self, key: &str, value: Value) {
49        self.known.insert(key.to_string(), value);
50        self.unknown_keys.remove(key);
51    }
52}
53
54impl Default for StaticState {
55    fn default() -> Self {
56        Self::new()
57    }
58}
59
60impl StateView for StaticState {
61    fn get_value(&self, key: &str) -> Option<Value> {
62        self.known.get(key).cloned()
63    }
64    fn key_exists(&self, key: &str) -> bool {
65        self.known.contains_key(key)
66    }
67    fn is_unknown(&self, key: &str) -> bool {
68        self.unknown_keys.contains(key)
69    }
70}
71
72/// A single verification finding.
73#[derive(Debug, Clone)]
74pub struct VerifyIssue {
75    pub action_id: String,
76    pub severity: String, // "error", "warning", "info"
77    pub message: String,
78}
79
80/// Complete verification result.
81#[derive(Debug)]
82pub struct VerifyResult {
83    pub valid: bool,
84    pub issues: Vec<VerifyIssue>,
85    pub simulated_state: HashMap<String, Value>,
86    pub execution_levels: Vec<Vec<String>>,
87    pub conflicts: Vec<(String, String, String)>, // (action1, action2, key)
88}
89
90impl VerifyResult {
91    pub fn errors(&self) -> Vec<&VerifyIssue> {
92        self.issues
93            .iter()
94            .filter(|i| i.severity == "error")
95            .collect()
96    }
97
98    pub fn warnings(&self) -> Vec<&VerifyIssue> {
99        self.issues
100            .iter()
101            .filter(|i| i.severity == "warning")
102            .collect()
103    }
104}
105
106// --- Action effects (symbolic) ---
107
108fn apply_action_effects(action: &Action, state: &mut StaticState) {
109    if action.action_type == ActionType::StateWrite {
110        if let Some(key) = action.parameters.get("key").and_then(|v| v.as_str()) {
111            let value = action
112                .parameters
113                .get("value")
114                .cloned()
115                .unwrap_or(Value::Null);
116            state.set(key, value);
117        }
118    }
119    for (key, value) in &action.expected_effects {
120        state.set(key, value.clone());
121    }
122}
123
124// --- Conflict detection ---
125
126fn detect_conflicts(actions: &[Action]) -> Vec<(String, String, String)> {
127    let mut writers: HashMap<String, Vec<String>> = HashMap::new();
128
129    for action in actions {
130        let mut keys_written = HashSet::new();
131        if action.action_type == ActionType::StateWrite {
132            if let Some(k) = action.parameters.get("key").and_then(|v| v.as_str()) {
133                keys_written.insert(k.to_string());
134            }
135        }
136        for key in action.expected_effects.keys() {
137            keys_written.insert(key.clone());
138        }
139        for key in keys_written {
140            writers.entry(key).or_default().push(action.id.clone());
141        }
142    }
143
144    let dep_map: HashMap<String, HashSet<String>> = actions
145        .iter()
146        .map(|a| (a.id.clone(), a.state_dependencies.iter().cloned().collect()))
147        .collect();
148
149    let mut conflicts = Vec::new();
150    for (key, action_ids) in &writers {
151        if action_ids.len() < 2 {
152            continue;
153        }
154        for i in 0..action_ids.len() {
155            for j in (i + 1)..action_ids.len() {
156                let a1 = &action_ids[i];
157                let a2 = &action_ids[j];
158                let deps_a2 = dep_map.get(a2).cloned().unwrap_or_default();
159                let deps_a1 = dep_map.get(a1).cloned().unwrap_or_default();
160                if !deps_a2.contains(key) && !deps_a1.contains(key) {
161                    conflicts.push((a1.clone(), a2.clone(), key.clone()));
162                }
163            }
164        }
165    }
166    conflicts
167}
168
169// --- Core verification ---
170
171/// Statically verify a proposal against an initial state.
172pub fn verify(
173    proposal: &ActionProposal,
174    initial_state: Option<&HashMap<String, Value>>,
175    registered_tools: Option<&HashSet<String>>,
176    max_actions: usize,
177) -> VerifyResult {
178    let mut state = match initial_state {
179        Some(s) => StaticState::from_map(s.clone()),
180        None => StaticState::new(),
181    };
182    let mut issues = Vec::new();
183
184    // Resource bounds
185    if proposal.actions.len() > max_actions {
186        issues.push(VerifyIssue {
187            action_id: proposal
188                .actions
189                .first()
190                .map(|a| a.id.clone())
191                .unwrap_or_default(),
192            severity: "warning".to_string(),
193            message: format!(
194                "excessive actions: {} (limit {})",
195                proposal.actions.len(),
196                max_actions
197            ),
198        });
199    }
200
201    // Loop detection
202    let mut seen_calls: HashMap<String, u32> = HashMap::new();
203    for action in &proposal.actions {
204        if action.action_type == ActionType::ToolCall {
205            if let Some(ref tool) = action.tool {
206                let params = serde_json::to_string(&action.parameters).unwrap_or_default();
207                let key = format!("{}:{}", tool, params);
208                *seen_calls.entry(key).or_insert(0) += 1;
209            }
210        }
211    }
212    for (call_key, count) in &seen_calls {
213        let tool_name = call_key.split(':').next().unwrap_or("?");
214        if *count >= 3 {
215            issues.push(VerifyIssue {
216                action_id: "proposal".to_string(),
217                severity: "error".to_string(),
218                message: format!(
219                    "repeated identical tool call: {} ({}x) — likely loop",
220                    tool_name, count
221                ),
222            });
223        } else if *count == 2 {
224            issues.push(VerifyIssue {
225                action_id: "proposal".to_string(),
226                severity: "warning".to_string(),
227                message: format!("duplicate tool call: {} ({}x)", tool_name, count),
228            });
229        }
230    }
231
232    // Build DAG
233    let levels = build_dag(&proposal.actions);
234    let execution_levels: Vec<Vec<String>> = levels
235        .iter()
236        .map(|level| {
237            level
238                .iter()
239                .map(|&i| proposal.actions[i].id.clone())
240                .collect()
241        })
242        .collect();
243
244    // Walk in topological order
245    for level in &levels {
246        for &idx in level {
247            let action = &proposal.actions[idx];
248
249            // Check preconditions
250            for pre in &action.preconditions {
251                if let Some(error) = precondition::check_precondition(pre, &state) {
252                    issues.push(VerifyIssue {
253                        action_id: action.id.clone(),
254                        severity: "error".to_string(),
255                        message: format!("precondition will fail: {}", error),
256                    });
257                }
258            }
259
260            // State dependencies
261            for dep in &action.state_dependencies {
262                if !state.exists(dep) && !state.is_unknown(dep) {
263                    issues.push(VerifyIssue {
264                        action_id: action.id.clone(),
265                        severity: "error".to_string(),
266                        message: format!("state dependency '{}' not available at this point", dep),
267                    });
268                }
269            }
270
271            // Tool existence
272            if action.action_type == ActionType::ToolCall {
273                if let Some(ref tool) = action.tool {
274                    if let Some(tools) = registered_tools {
275                        if !tools.contains(tool.as_str()) {
276                            issues.push(VerifyIssue {
277                                action_id: action.id.clone(),
278                                severity: "error".to_string(),
279                                message: format!("tool '{}' is not registered", tool),
280                            });
281                        }
282                    }
283                } else {
284                    issues.push(VerifyIssue {
285                        action_id: action.id.clone(),
286                        severity: "error".to_string(),
287                        message: "tool_call action has no tool specified".to_string(),
288                    });
289                }
290            }
291
292            apply_action_effects(action, &mut state);
293        }
294    }
295
296    // Conflicts
297    let conflicts = detect_conflicts(&proposal.actions);
298    for (a1, a2, key) in &conflicts {
299        issues.push(VerifyIssue {
300            action_id: a1.clone(),
301            severity: "warning".to_string(),
302            message: format!(
303                "write conflict on '{}' with action {} (no dependency declared)",
304                key, a2
305            ),
306        });
307    }
308
309    let has_errors = issues.iter().any(|i| i.severity == "error");
310
311    VerifyResult {
312        valid: !has_errors,
313        issues,
314        simulated_state: state.known,
315        execution_levels,
316        conflicts,
317    }
318}
319
320/// Simulate a proposal's state effects without executing tools.
321pub fn simulate(
322    proposal: &ActionProposal,
323    initial_state: Option<&HashMap<String, Value>>,
324) -> HashMap<String, Value> {
325    verify(proposal, initial_state, None, usize::MAX).simulated_state
326}
327
328/// Test if two proposals produce identical state transitions.
329pub fn equivalent(
330    p1: &ActionProposal,
331    p2: &ActionProposal,
332    test_states: Option<&[HashMap<String, Value>]>,
333) -> bool {
334    let defaults = vec![
335        HashMap::new(),
336        [
337            ("x".to_string(), Value::from(1)),
338            ("y".to_string(), Value::from(2)),
339        ]
340        .into(),
341    ];
342    let states = test_states.unwrap_or(&defaults);
343
344    for state in states {
345        let s1 = simulate(p1, Some(state));
346        let s2 = simulate(p2, Some(state));
347        if s1 != s2 {
348            return false;
349        }
350    }
351    true
352}
353
354/// Optimize a proposal: remove phantom dependencies to enable more parallelism.
355pub fn optimize(proposal: &ActionProposal) -> ActionProposal {
356    // Find which keys are actually written
357    let mut written_keys = HashSet::new();
358    for action in &proposal.actions {
359        if action.action_type == ActionType::StateWrite {
360            if let Some(k) = action.parameters.get("key").and_then(|v| v.as_str()) {
361                written_keys.insert(k.to_string());
362            }
363        }
364        for key in action.expected_effects.keys() {
365            written_keys.insert(key.clone());
366        }
367    }
368
369    let optimized_actions: Vec<Action> = proposal
370        .actions
371        .iter()
372        .map(|action| {
373            let pruned: Vec<String> = action
374                .state_dependencies
375                .iter()
376                .filter(|d| written_keys.contains(d.as_str()))
377                .cloned()
378                .collect();
379
380            if pruned.len() != action.state_dependencies.len() {
381                let mut new_action = action.clone();
382                new_action.state_dependencies = pruned;
383                new_action
384            } else {
385                action.clone()
386            }
387        })
388        .collect();
389
390    ActionProposal {
391        id: proposal.id.clone(),
392        source: proposal.source.clone(),
393        actions: optimized_actions,
394        timestamp: proposal.timestamp,
395        context: proposal.context.clone(),
396    }
397}
398
399#[cfg(test)]
400mod tests {
401    use super::*;
402    use car_ir::{FailureBehavior, Precondition};
403
404    fn tool_call(id: &str, tool: &str) -> Action {
405        Action {
406            id: id.to_string(),
407            action_type: ActionType::ToolCall,
408            tool: Some(tool.to_string()),
409            parameters: HashMap::new(),
410            preconditions: vec![],
411            expected_effects: HashMap::new(),
412            state_dependencies: vec![],
413            idempotent: false,
414            max_retries: 3,
415            failure_behavior: FailureBehavior::Abort,
416            timeout_ms: None,
417            metadata: HashMap::new(),
418        }
419    }
420
421    fn state_write(id: &str, key: &str, value: Value) -> Action {
422        Action {
423            id: id.to_string(),
424            action_type: ActionType::StateWrite,
425            tool: None,
426            parameters: [
427                ("key".to_string(), Value::from(key)),
428                ("value".to_string(), value),
429            ]
430            .into(),
431            preconditions: vec![],
432            expected_effects: HashMap::new(),
433            state_dependencies: vec![],
434            idempotent: false,
435            max_retries: 3,
436            failure_behavior: FailureBehavior::Abort,
437            timeout_ms: None,
438            metadata: HashMap::new(),
439        }
440    }
441
442    fn prop(actions: Vec<Action>) -> ActionProposal {
443        ActionProposal {
444            id: "test".to_string(),
445            source: "test".to_string(),
446            actions,
447            timestamp: chrono::Utc::now(),
448            context: HashMap::new(),
449        }
450    }
451
452    #[test]
453    fn verify_valid_proposal() {
454        let p = prop(vec![state_write("a1", "x", Value::from(1)), {
455            let mut a = tool_call("a2", "search");
456            a.state_dependencies = vec!["x".to_string()];
457            a
458        }]);
459        let r = verify(&p, None, Some(&["search".to_string()].into()), 30);
460        assert!(r.valid);
461    }
462
463    #[test]
464    fn verify_catches_unsatisfied_precondition() {
465        let mut a = tool_call("a1", "deploy");
466        a.preconditions = vec![Precondition {
467            key: "tests_passed".to_string(),
468            operator: "eq".to_string(),
469            value: Value::Bool(true),
470            description: String::new(),
471        }];
472        let r = verify(&prop(vec![a]), None, None, 30);
473        assert!(!r.valid);
474    }
475
476    #[test]
477    fn verify_precondition_satisfied_by_earlier_action() {
478        let mut a2 = tool_call("a2", "deploy");
479        a2.preconditions = vec![Precondition {
480            key: "ready".to_string(),
481            operator: "eq".to_string(),
482            value: Value::Bool(true),
483            description: String::new(),
484        }];
485        a2.state_dependencies = vec!["ready".to_string()];
486
487        let p = prop(vec![state_write("a1", "ready", Value::Bool(true)), a2]);
488        let r = verify(&p, None, None, 30);
489        assert!(r.valid);
490    }
491
492    #[test]
493    fn verify_missing_state_dependency() {
494        let mut a = tool_call("a1", "x");
495        a.state_dependencies = vec!["nonexistent".to_string()];
496        let r = verify(&prop(vec![a]), None, None, 30);
497        assert!(!r.valid);
498    }
499
500    #[test]
501    fn verify_tool_not_registered() {
502        let a = tool_call("a1", "quantum");
503        let r = verify(&prop(vec![a]), None, Some(&HashSet::new()), 30);
504        assert!(!r.valid);
505    }
506
507    #[test]
508    fn verify_no_tool_specified() {
509        let mut a = tool_call("a1", "x");
510        a.tool = None;
511        let r = verify(&prop(vec![a]), None, None, 30);
512        assert!(!r.valid);
513    }
514
515    #[test]
516    fn detect_write_conflict() {
517        let p = prop(vec![
518            state_write("a1", "x", Value::from(1)),
519            state_write("a2", "x", Value::from(2)),
520        ]);
521        let r = verify(&p, None, None, 30);
522        assert!(!r.conflicts.is_empty());
523    }
524
525    #[test]
526    fn simulate_state_writes() {
527        let p = prop(vec![
528            state_write("a1", "x", Value::from(10)),
529            state_write("a2", "y", Value::from(20)),
530        ]);
531        let s = simulate(&p, None);
532        assert_eq!(s.get("x"), Some(&Value::from(10)));
533        assert_eq!(s.get("y"), Some(&Value::from(20)));
534    }
535
536    #[test]
537    fn equivalent_proposals() {
538        let p1 = prop(vec![
539            state_write("a1", "x", Value::from(1)),
540            state_write("a2", "y", Value::from(2)),
541        ]);
542        let p2 = prop(vec![
543            state_write("b1", "y", Value::from(2)),
544            state_write("b2", "x", Value::from(1)),
545        ]);
546        assert!(equivalent(&p1, &p2, None));
547    }
548
549    #[test]
550    fn non_equivalent_proposals() {
551        let p1 = prop(vec![state_write("a1", "x", Value::from(1))]);
552        let p2 = prop(vec![state_write("b1", "x", Value::from(99))]);
553        assert!(!equivalent(&p1, &p2, None));
554    }
555
556    #[test]
557    fn optimize_removes_phantom_deps() {
558        let mut a = tool_call("a1", "search");
559        a.state_dependencies = vec!["phantom".to_string()];
560        let p = prop(vec![a]);
561        let optimized = optimize(&p);
562        assert!(optimized.actions[0].state_dependencies.is_empty());
563    }
564
565    #[test]
566    fn optimize_preserves_real_deps() {
567        let mut a2 = tool_call("a2", "x");
568        a2.state_dependencies = vec!["x".to_string()];
569        let p = prop(vec![state_write("a1", "x", Value::from(1)), a2]);
570        let optimized = optimize(&p);
571        assert_eq!(optimized.actions[1].state_dependencies, vec!["x"]);
572    }
573
574    #[test]
575    fn loop_detection_duplicates() {
576        let p = prop(vec![tool_call("a1", "search"), tool_call("a2", "search")]);
577        let r = verify(&p, None, None, 30);
578        assert!(r.issues.iter().any(|i| i.message.contains("duplicate")));
579    }
580
581    #[test]
582    fn loop_detection_triple() {
583        let p = prop(vec![
584            tool_call("a1", "search"),
585            tool_call("a2", "search"),
586            tool_call("a3", "search"),
587        ]);
588        let r = verify(&p, None, None, 30);
589        assert!(!r.valid);
590        assert!(r.issues.iter().any(|i| i.message.contains("likely loop")));
591    }
592
593    #[test]
594    fn resource_bounds() {
595        let actions: Vec<Action> = (0..35)
596            .map(|i| tool_call(&format!("a{}", i), &format!("t{}", i)))
597            .collect();
598        let r = verify(&prop(actions), None, None, 30);
599        assert!(r.issues.iter().any(|i| i.message.contains("excessive")));
600    }
601}
car_verify/lib.rs

car_verify/
lib.rs