Skip to main content

mur_common/coordination/
conformance.rs

1//! Conformance testing framework (§10).
2//!
3//! Hosts implement [`ConformanceAdapter`] and pass [`PlanLoadingSuite`]
4//! (and future suites for Standard/Full levels) to prove conformance.
5//!
6//! The 10 plan-loading tests cover:
7//! 1. Valid minimal plan
8//! 2. Valid multi-step plan with dependencies
9//! 3. Optional fields default correctly
10//! 4. Unknown phase rejected
11//! 5. Empty phases rejected
12//! 6. Empty verify_command rejected
13//! 7. Unknown dependency rejected
14//! 8. Dependency cycle rejected
15//! 9. Phases out of SDLC order rejected
16//! 10. content_sha256 non-empty on valid plan
17
18use serde::{Deserialize, Serialize};
19use std::fmt;
20
21use super::plan::Plan;
22use super::types::ConformanceLevel;
23
24/// Interface a host must implement to run the conformance suite.
25///
26/// The adapter is the **host's** bridge to the shared types. It provides:
27/// - A TOML parser + validator (usually just `toml::from_str` + `Plan::validate`).
28/// - The host's declared conformance level.
29/// - A human-readable host name for test reports.
30pub trait ConformanceAdapter {
31    /// Parse a TOML string into a Plan and validate it.
32    ///
33    /// Returns the parsed Plan on success, or a list of human-readable
34    /// error messages on failure.
35    fn parse_and_validate(&self, toml: &str) -> Result<Plan, Vec<String>>;
36
37    /// The conformance level this host claims.
38    fn conformance_level(&self) -> ConformanceLevel;
39
40    /// Human-readable host name for test reports.
41    fn host_name(&self) -> &str;
42}
43
44/// Result of running a conformance suite.
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct ConformanceReport {
47    /// Name of the suite that was run.
48    pub suite_name: String,
49    /// Host name (from [`ConformanceAdapter::host_name`]).
50    pub host_name: String,
51    /// Total tests in the suite.
52    pub total: usize,
53    /// Number of tests that passed.
54    pub passed: usize,
55    /// Number of tests that failed.
56    pub failed: usize,
57    /// Per-failure details (empty if all passed).
58    pub failures: Vec<TestFailure>,
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct TestFailure {
63    pub test_name: String,
64    pub error: String,
65}
66
67impl ConformanceReport {
68    /// Did every test in the suite pass?
69    pub fn all_passed(&self) -> bool {
70        self.failures.is_empty()
71    }
72}
73
74impl fmt::Display for ConformanceReport {
75    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
76        writeln!(
77            f,
78            "{} — {}: {}/{} passed",
79            self.suite_name, self.host_name, self.passed, self.total
80        )?;
81        for failure in &self.failures {
82            writeln!(f, "  FAIL {}: {}", failure.test_name, failure.error)?;
83        }
84        Ok(())
85    }
86}
87
88/// The Minimal-conformance plan-loading test suite.
89///
90/// Contains 10 self-contained tests that exercise Plan TOML parsing
91/// and validation. Each test is a TOML string + an assertion about
92/// whether it should parse+validate successfully.
93pub struct PlanLoadingSuite {
94    cases: Vec<PlanLoadingCase>,
95}
96
97struct PlanLoadingCase {
98    name: &'static str,
99    toml: &'static str,
100    should_pass: bool,
101    /// If should_pass, also verify the parsed plan has the expected field values.
102    #[allow(clippy::type_complexity)]
103    check_fields: Option<Box<dyn Fn(&Plan) -> Result<(), String>>>,
104}
105
106impl PlanLoadingSuite {
107    pub fn new() -> Self {
108        Self {
109            cases: vec![
110                // 1. Valid minimal plan
111                PlanLoadingCase {
112                    name: "valid_minimal_plan",
113                    toml: r#"
114[plan]
115version = "0"
116plan_id = "550e8400-e29b-41d4-a716-446655440000"
117goal = "test"
118created_at = "2026-05-24T12:00:00Z"
119created_by = "agent:test"
120budget_estimate_usd = 0.0
121determinism = "best-effort"
122content_sha256 = "a"
123
124[[plan.steps]]
125step_id = "s1"
126description = "test"
127agent_hint = "generic"
128phases = ["verify"]
129verify_command = "true"
130depends_on = []
131"#,
132                    should_pass: true,
133                    check_fields: None,
134                },
135                // 2. Valid multi-step plan with dependencies
136                PlanLoadingCase {
137                    name: "valid_multi_step_with_deps",
138                    toml: r#"
139[plan]
140version = "0"
141plan_id = "550e8400-e29b-41d4-a716-446655440000"
142goal = "multi-step"
143created_at = "2026-05-24T12:00:00Z"
144created_by = "agent:test"
145budget_estimate_usd = 1.00
146determinism = "strict"
147content_sha256 = "b"
148
149[[plan.steps]]
150step_id = "build"
151description = "build the project"
152agent_hint = "generic"
153phases = ["plan", "implement", "verify"]
154verify_command = "cargo build"
155depends_on = []
156
157[[plan.steps]]
158step_id = "test"
159description = "run tests"
160agent_hint = "code-review"
161phases = ["test", "verify"]
162verify_command = "cargo test"
163depends_on = ["build"]
164
165[[plan.steps]]
166step_id = "deploy"
167description = "deploy"
168agent_hint = "generic"
169phases = ["verify"]
170verify_command = "curl localhost/health"
171depends_on = ["test"]
172"#,
173                    should_pass: true,
174                    check_fields: None,
175                },
176                // 3. Optional fields default correctly
177                PlanLoadingCase {
178                    name: "optional_fields_default",
179                    toml: r#"
180[plan]
181version = "0"
182plan_id = "550e8400-e29b-41d4-a716-446655440000"
183goal = "test defaults"
184created_at = "2026-05-24T12:00:00Z"
185created_by = "agent:test"
186budget_estimate_usd = 0.0
187determinism = "best-effort"
188content_sha256 = "c"
189
190[[plan.steps]]
191step_id = "s1"
192description = "test"
193agent_hint = "generic"
194phases = ["verify"]
195verify_command = "true"
196depends_on = []
197"#,
198                    should_pass: true,
199                    check_fields: Some(Box::new(|plan: &Plan| {
200                        if plan.plan.signature.is_some() {
201                            return Err("signature should be None by default".into());
202                        }
203                        if plan.plan.steps[0].skill_ref.is_some() {
204                            return Err("skill_ref should be None by default".into());
205                        }
206                        if plan.plan.max_escalations != 3 {
207                            return Err(format!(
208                                "max_escalations should default to 3, got {}",
209                                plan.plan.max_escalations
210                            ));
211                        }
212                        Ok(())
213                    })),
214                },
215                // 4. Unknown phase rejected
216                PlanLoadingCase {
217                    name: "reject_unknown_phase",
218                    toml: r#"
219[plan]
220version = "0"
221plan_id = "550e8400-e29b-41d4-a716-446655440000"
222goal = "bad phase"
223created_at = "2026-05-24T12:00:00Z"
224created_by = "agent:test"
225budget_estimate_usd = 0.0
226determinism = "best-effort"
227content_sha256 = "d"
228
229[[plan.steps]]
230step_id = "s1"
231description = "bad phase"
232agent_hint = "generic"
233phases = ["bogus_phase"]
234verify_command = "true"
235depends_on = []
236"#,
237                    should_pass: false,
238                    check_fields: None,
239                },
240                // 5. Empty phases rejected
241                PlanLoadingCase {
242                    name: "reject_empty_phases",
243                    toml: r#"
244[plan]
245version = "0"
246plan_id = "550e8400-e29b-41d4-a716-446655440000"
247goal = "no phases"
248created_at = "2026-05-24T12:00:00Z"
249created_by = "agent:test"
250budget_estimate_usd = 0.0
251determinism = "best-effort"
252content_sha256 = "e"
253
254[[plan.steps]]
255step_id = "s1"
256description = "no phases"
257agent_hint = "generic"
258phases = []
259verify_command = "true"
260depends_on = []
261"#,
262                    should_pass: false,
263                    check_fields: None,
264                },
265                // 6. Empty verify_command rejected
266                PlanLoadingCase {
267                    name: "reject_empty_verify_command",
268                    toml: r#"
269[plan]
270version = "0"
271plan_id = "550e8400-e29b-41d4-a716-446655440000"
272goal = "no verify"
273created_at = "2026-05-24T12:00:00Z"
274created_by = "agent:test"
275budget_estimate_usd = 0.0
276determinism = "best-effort"
277content_sha256 = "f"
278
279[[plan.steps]]
280step_id = "s1"
281description = "no verify"
282agent_hint = "generic"
283phases = ["verify"]
284verify_command = ""
285depends_on = []
286"#,
287                    should_pass: false,
288                    check_fields: None,
289                },
290                // 7. Unknown dependency rejected
291                PlanLoadingCase {
292                    name: "reject_unknown_dependency",
293                    toml: r#"
294[plan]
295version = "0"
296plan_id = "550e8400-e29b-41d4-a716-446655440000"
297goal = "bad dep"
298created_at = "2026-05-24T12:00:00Z"
299created_by = "agent:test"
300budget_estimate_usd = 0.0
301determinism = "best-effort"
302content_sha256 = "g"
303
304[[plan.steps]]
305step_id = "s1"
306description = "bad dep"
307agent_hint = "generic"
308phases = ["verify"]
309verify_command = "true"
310depends_on = ["nonexistent"]
311"#,
312                    should_pass: false,
313                    check_fields: None,
314                },
315                // 8. Dependency cycle rejected
316                PlanLoadingCase {
317                    name: "reject_dependency_cycle",
318                    toml: r#"
319[plan]
320version = "0"
321plan_id = "550e8400-e29b-41d4-a716-446655440000"
322goal = "cycle"
323created_at = "2026-05-24T12:00:00Z"
324created_by = "agent:test"
325budget_estimate_usd = 0.0
326determinism = "best-effort"
327content_sha256 = "h"
328
329[[plan.steps]]
330step_id = "s1"
331description = "step 1"
332agent_hint = "generic"
333phases = ["verify"]
334verify_command = "true"
335depends_on = ["s2"]
336
337[[plan.steps]]
338step_id = "s2"
339description = "step 2"
340agent_hint = "generic"
341phases = ["verify"]
342verify_command = "true"
343depends_on = ["s1"]
344"#,
345                    should_pass: false,
346                    check_fields: None,
347                },
348                // 9. Phases out of SDLC order rejected
349                PlanLoadingCase {
350                    name: "reject_phases_out_of_order",
351                    toml: r#"
352[plan]
353version = "0"
354plan_id = "550e8400-e29b-41d4-a716-446655440000"
355goal = "bad order"
356created_at = "2026-05-24T12:00:00Z"
357created_by = "agent:test"
358budget_estimate_usd = 0.0
359determinism = "best-effort"
360content_sha256 = "i"
361
362[[plan.steps]]
363step_id = "s1"
364description = "bad order"
365agent_hint = "generic"
366phases = ["verify", "plan", "implement"]
367verify_command = "true"
368depends_on = []
369"#,
370                    should_pass: false,
371                    check_fields: None,
372                },
373                // 10. content_sha256 must be non-empty
374                PlanLoadingCase {
375                    name: "reject_empty_content_hash",
376                    toml: r#"
377[plan]
378version = "0"
379plan_id = "550e8400-e29b-41d4-a716-446655440000"
380goal = "no hash"
381created_at = "2026-05-24T12:00:00Z"
382created_by = "agent:test"
383budget_estimate_usd = 0.0
384determinism = "best-effort"
385content_sha256 = ""
386
387[[plan.steps]]
388step_id = "s1"
389description = "test"
390agent_hint = "generic"
391phases = ["verify"]
392verify_command = "true"
393depends_on = []
394"#,
395                    should_pass: false,
396                    check_fields: None,
397                },
398            ],
399        }
400    }
401
402    /// Run all 10 tests against the given adapter.
403    pub fn run(&self, adapter: &dyn ConformanceAdapter) -> ConformanceReport {
404        let mut failures = Vec::new();
405
406        for case in &self.cases {
407            let result = adapter.parse_and_validate(case.toml);
408            match (case.should_pass, result) {
409                (true, Ok(ref plan)) => {
410                    if let Some(ref check) = case.check_fields
411                        && let Err(err) = check(plan)
412                    {
413                        failures.push(TestFailure {
414                            test_name: case.name.to_string(),
415                            error: format!("field check failed: {}", err),
416                        });
417                    }
418                }
419                (true, Err(errs)) => {
420                    failures.push(TestFailure {
421                        test_name: case.name.to_string(),
422                        error: format!("expected success, got errors: {:?}", errs),
423                    });
424                }
425                (false, Ok(_)) => {
426                    failures.push(TestFailure {
427                        test_name: case.name.to_string(),
428                        error: "expected validation failure, got success".to_string(),
429                    });
430                }
431                (false, Err(_)) => {
432                    // Expected failure — correct.
433                }
434            }
435        }
436
437        let total = self.cases.len();
438        let failed_count = failures.len();
439        ConformanceReport {
440            suite_name: "PlanLoadingSuite".to_string(),
441            host_name: adapter.host_name().to_string(),
442            total,
443            passed: total - failed_count,
444            failed: failed_count,
445            failures,
446        }
447    }
448}
449
450impl Default for PlanLoadingSuite {
451    fn default() -> Self {
452        Self::new()
453    }
454}
455
456#[cfg(test)]
457mod tests {
458    use super::*;
459    use crate::coordination::plan::Plan;
460    use crate::coordination::types::ConformanceLevel;
461
462    /// A no-op adapter that returns pre-parsed plans from a Vec.
463    struct StaticAdapter {}
464
465    impl ConformanceAdapter for StaticAdapter {
466        fn parse_and_validate(&self, toml: &str) -> Result<Plan, Vec<String>> {
467            let plan: Plan = toml::from_str(toml).map_err(|e| vec![e.to_string()])?;
468            let errors = plan.validate();
469            if !errors.is_empty() {
470                return Err(errors.iter().map(|e| e.to_string()).collect());
471            }
472            Ok(plan)
473        }
474
475        fn conformance_level(&self) -> ConformanceLevel {
476            ConformanceLevel::Minimal
477        }
478
479        fn host_name(&self) -> &str {
480            "static-test-adapter"
481        }
482    }
483
484    #[test]
485    fn test_conformance_minimal_plan_loading_passes() {
486        let adapter = StaticAdapter {};
487        let suite = PlanLoadingSuite::new();
488        let report = suite.run(&adapter);
489        assert!(
490            report.failures.is_empty(),
491            "all plan-loading tests must pass: {:?}",
492            report.failures
493        );
494    }
495}