Skip to main content

exo_consensus/
lib.rs

1// Copyright 2026 Exochain Foundation
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at:
6//
7//     https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// SPDX-License-Identifier: Apache-2.0
16
17#![cfg_attr(test, allow(clippy::expect_used, clippy::unwrap_used))]
18
19pub mod advocate;
20pub mod commitment;
21pub mod error;
22pub mod panel;
23pub mod record;
24pub mod report;
25pub mod round;
26pub mod scoring;
27pub mod session;
28
29pub use commitment::{commit, commit_response, verify_commitment, verify_response_commitment};
30pub use error::{ConsensusError, Result};
31pub use panel::{ModelProvider, ModelRole, Panel, PanelModel};
32pub use record::DeliberationResult;
33pub use report::MinorityReport;
34pub use round::{DeliberationRound, DevilAdvocateReview, ModelDeliberationResponse, ModelPosition};
35pub use scoring::{
36    PanelConfidenceInputs, calculate_convergence, calculate_panel_confidence, canonical_claim_set,
37    consensus_claims_at_threshold,
38};
39pub use session::{
40    DeliberationSession, DeterministicResponseProvider, FinalizationTiming, RoundExecutionTiming,
41};
42
43#[cfg(test)]
44mod tests {
45    use std::collections::BTreeMap;
46
47    use decision_forum::decision_object::DecisionClass;
48    use exo_core::types::Timestamp;
49    use serde::Serialize;
50
51    use super::*;
52
53    fn round_timing(round: u64) -> RoundExecutionTiming {
54        RoundExecutionTiming {
55            submitted_at: Timestamp::new(round * 10, 0),
56            revealed_at: Timestamp::new(round * 10, 1),
57        }
58    }
59
60    fn finalization_timing() -> FinalizationTiming {
61        FinalizationTiming {
62            completed_at: Timestamp::new(1000, 0),
63        }
64    }
65
66    fn response(text: &str, claims: &[&str]) -> ModelDeliberationResponse {
67        ModelDeliberationResponse {
68            position_text: text.to_string(),
69            key_claims: claims.iter().map(|claim| (*claim).to_string()).collect(),
70            confidence_bps: 8000,
71        }
72    }
73
74    fn routine_response_provider(
75        response_text: &str,
76        claims: &[&str],
77    ) -> DeterministicResponseProvider {
78        DeterministicResponseProvider::with_positions(routine_panel_responses(
79            response_text,
80            claims,
81        ))
82    }
83
84    fn operational_response_provider(
85        response_text: &str,
86        claims: &[&str],
87    ) -> DeterministicResponseProvider {
88        DeterministicResponseProvider::new(
89            BTreeMap::from([
90                (
91                    "claude-3-5-sonnet".to_string(),
92                    response(response_text, claims),
93                ),
94                ("gpt-4o".to_string(), response(response_text, claims)),
95                (
96                    "gemini-1.5-pro".to_string(),
97                    response(response_text, claims),
98                ),
99            ]),
100            BTreeMap::from([("gpt-4o".to_string(), neutral_review())]),
101        )
102    }
103
104    fn neutral_review() -> DevilAdvocateReview {
105        DevilAdvocateReview {
106            review_text: "No threshold objection found.".to_string(),
107            serious_objection: false,
108            reasons: Vec::new(),
109        }
110    }
111
112    // 1. test_convergence_identical_positions
113    #[test]
114    fn test_convergence_identical_positions() {
115        let pos = vec![
116            vec![
117                "claim1".to_string(),
118                "claim2".to_string(),
119                "claim3".to_string(),
120            ],
121            vec![
122                "claim1".to_string(),
123                "claim2".to_string(),
124                "claim3".to_string(),
125            ],
126        ];
127        let score = calculate_convergence(&pos);
128        assert_eq!(score, 10000);
129    }
130
131    // 2. test_convergence_zero_overlap
132    #[test]
133    fn test_convergence_zero_overlap() {
134        let pos = vec![
135            vec!["claim1".to_string(), "claim2".to_string()],
136            vec!["claim3".to_string(), "claim4".to_string()],
137        ];
138        let score = calculate_convergence(&pos);
139        assert_eq!(score, 0);
140    }
141
142    // 3. test_convergence_partial_overlap
143    #[test]
144    fn test_convergence_partial_overlap() {
145        // "claim1" is shared, "claim2", "claim3", "claim4", "claim5" are not. Total unique: 5.
146        // Shared: 1. Wait, let's just make it simple:
147        let pos = vec![
148            vec!["A".to_string(), "B".to_string()],
149            vec!["A".to_string(), "C".to_string()],
150        ];
151        let score = calculate_convergence(&pos);
152        // Unique claims: a, b, c (3). Shared: a (1).
153        // Score = 1/3 * 10000 = 3333.
154        // Let's adjust expected based on logic.
155        assert_eq!(score, 3333);
156
157        // For exactly 50%: "A, B", "A, B, C, D" => Wait.
158        let pos2 = vec![
159            vec!["A".to_string(), "B".to_string()],
160            vec![
161                "A".to_string(),
162                "B".to_string(),
163                "C".to_string(),
164                "D".to_string(),
165            ],
166        ];
167        let score2 = calculate_convergence(&pos2);
168        // unique: a, b, c, d (4). Shared: a, b (2).
169        // 2/4 = 5000
170        assert_eq!(score2, 5000);
171    }
172
173    // 4. test_panel_confidence_unanimous_fast
174    #[test]
175    fn test_panel_confidence_unanimous_fast() {
176        let inputs = PanelConfidenceInputs {
177            models_agreeing: 3,
178            total_models: 3,
179            converged: true,
180            rounds_to_convergence: 1,
181            max_rounds: 3,
182            devil_found_serious_objection: false,
183            minority_reports_count: 0,
184        };
185        let pci = calculate_panel_confidence(&inputs);
186        // agreement = 5000
187        // speed = ((3 - 1 + 1) / 3) * 3000 = 3/3 * 3000 = 3000
188        // advocate = 2000
189        // total = 10000
190        assert_eq!(pci, 10000);
191    }
192
193    // 5. test_panel_confidence_split_slow
194    #[test]
195    fn test_panel_confidence_split_slow() {
196        let inputs = PanelConfidenceInputs {
197            models_agreeing: 2,
198            total_models: 3,
199            converged: true,
200            rounds_to_convergence: 3,
201            max_rounds: 3,
202            devil_found_serious_objection: false,
203            minority_reports_count: 1,
204        };
205        let pci = calculate_panel_confidence(&inputs);
206        // agreement = (2/3) * 5000 = 3333
207        // speed = ((3 - 3 + 1) / 3) * 3000 = 1/3 * 3000 = 1000
208        // advocate = 2000
209        // total = 6333
210        assert_eq!(pci, 6333);
211    }
212
213    // 6. test_panel_confidence_devil_found_issue
214    #[test]
215    fn test_panel_confidence_devil_found_issue() {
216        let inputs = PanelConfidenceInputs {
217            models_agreeing: 3,
218            total_models: 3,
219            converged: true,
220            rounds_to_convergence: 1,
221            max_rounds: 3,
222            devil_found_serious_objection: true,
223            minority_reports_count: 0,
224        };
225        let pci = calculate_panel_confidence(&inputs);
226        // advocate = 0
227        assert_eq!(pci, 8000); // 5000 + 3000 + 0
228    }
229
230    // 7. test_minority_report_triggered
231    #[test]
232    fn test_minority_report_triggered() {
233        let pos = ModelPosition {
234            model_id: "m1".into(),
235            round: 1,
236            position_hash: exo_core::types::Hash256::ZERO,
237            position_text: "claim3".into(),
238            key_claims: vec!["claim3".into()],
239            confidence_bps: 8000,
240            submitted_at: Timestamp::new(1, 0),
241            revealed_at: None,
242        };
243        let consensus_claims = vec!["claim1".into(), "claim2".into()];
244        // overlap is 0/2. threshold is 5000.
245        let triggered = report::is_minority_report(&pos, &consensus_claims, 5000);
246        assert!(triggered);
247    }
248
249    // 8. test_minority_report_not_triggered
250    #[test]
251    fn test_minority_report_not_triggered() {
252        let pos = ModelPosition {
253            model_id: "m1".into(),
254            round: 1,
255            position_hash: exo_core::types::Hash256::ZERO,
256            position_text: "claim1, claim2".into(),
257            key_claims: vec!["claim1".into(), "claim2".into()],
258            confidence_bps: 8000,
259            submitted_at: Timestamp::new(1, 0),
260            revealed_at: None,
261        };
262        let consensus_claims = vec!["claim1".into(), "claim2".into()];
263        let triggered = report::is_minority_report(&pos, &consensus_claims, 5000);
264        assert!(!triggered);
265    }
266
267    // 9. test_round_hash_deterministic
268    #[test]
269    fn test_round_hash_deterministic() {
270        let round = DeliberationRound {
271            round_number: 1,
272            question: "Q".into(),
273            positions: BTreeMap::new(),
274            synthesis: None,
275            convergence_score_bps: 10000,
276            devil_advocate_review: None,
277            round_hash: exo_core::types::Hash256::ZERO,
278        };
279        let h1 = round.compute_hash().expect("round hash");
280        let h2 = round.compute_hash().expect("round hash");
281        assert_eq!(h1, h2);
282    }
283
284    #[test]
285    fn structured_response_commitment_binds_claims_and_confidence() {
286        let original = response("same prose", &["claim-a", "claim-b"]);
287        let same = response("same prose", &["claim-a", "claim-b"]);
288        let changed_claims = response("same prose", &["claim-a", "claim-c"]);
289        let mut changed_confidence = response("same prose", &["claim-a", "claim-b"]);
290        changed_confidence.confidence_bps = 7000;
291
292        let original_hash = commit_response(&original).expect("structured response hash");
293
294        assert_eq!(
295            original_hash,
296            commit_response(&same).expect("same structured response hash")
297        );
298        assert_ne!(
299            original_hash,
300            commit_response(&changed_claims).expect("changed claims hash")
301        );
302        assert_ne!(
303            original_hash,
304            commit_response(&changed_confidence).expect("changed confidence hash")
305        );
306        assert!(
307            verify_response_commitment(&original, &original_hash)
308                .expect("verify structured commitment")
309        );
310        assert!(
311            !verify_response_commitment(&changed_claims, &original_hash)
312                .expect("reject changed structured claims")
313        );
314    }
315
316    // 10. test_result_hash_deterministic
317    #[test]
318    fn test_result_hash_deterministic() {
319        let result = DeliberationResult {
320            session_id: "s1".into(),
321            question: "Q".into(),
322            rounds: vec![],
323            final_consensus: "C".into(),
324            minority_reports: vec![],
325            panel_confidence_index_bps: 8000,
326            rounds_to_convergence: 1,
327            devil_advocate_summary: None,
328            deliberation_hash: exo_core::types::Hash256::ZERO,
329            completed_at: Timestamp::new(1, 0),
330        };
331        let h1 = result.compute_hash().expect("result hash");
332        let h2 = result.compute_hash().expect("result hash");
333        assert_eq!(h1, h2);
334    }
335
336    // 11. test_result_hash_changes
337    #[test]
338    fn test_result_hash_changes() {
339        let mut result = DeliberationResult {
340            session_id: "s1".into(),
341            question: "Q".into(),
342            rounds: vec![],
343            final_consensus: "C".into(),
344            minority_reports: vec![],
345            panel_confidence_index_bps: 8000,
346            rounds_to_convergence: 1,
347            devil_advocate_summary: None,
348            deliberation_hash: exo_core::types::Hash256::ZERO,
349            completed_at: Timestamp::new(1, 0),
350        };
351        let h1 = result.compute_hash().expect("result hash");
352        result.rounds_to_convergence = 2;
353        let h2 = result.compute_hash().expect("result hash");
354        assert_ne!(h1, h2);
355    }
356
357    // 12. test_deterministic_session_single_round
358    #[test]
359    fn test_deterministic_session_single_round() {
360        let panel = Panel::default_panel(DecisionClass::Routine);
361        let provider = routine_response_provider("A, B, C", &["a", "b", "c"]);
362
363        let mut session =
364            DeliberationSession::new("test".into(), panel, "What is X?".into(), provider);
365        let round = session.execute_round(round_timing(1)).unwrap();
366        assert_eq!(round.round_number, 1);
367        assert_eq!(round.positions.len(), 3);
368
369        let result = session.finalize(finalization_timing()).unwrap();
370        assert_eq!(result.rounds.len(), 1);
371    }
372
373    // 13. test_deterministic_session_converges
374    #[test]
375    fn test_deterministic_session_converges() {
376        let panel = Panel::default_panel(DecisionClass::Operational);
377        let provider = operational_response_provider("identical claim", &["identical claim"]);
378
379        let mut session =
380            DeliberationSession::new("test".into(), panel, "What is X?".into(), provider);
381        let round = session.execute_round(round_timing(1)).unwrap();
382
383        // Since all give "identical claim", convergence should be 10000
384        assert_eq!(round.convergence_score_bps, 10000);
385        assert!(session.is_converged());
386
387        let result = session.finalize(finalization_timing()).unwrap();
388        assert_eq!(result.rounds_to_convergence, 1);
389    }
390
391    // 14. test_default_panel_by_class
392    #[test]
393    fn test_default_panel_by_class() {
394        let p_routine = Panel::default_panel(DecisionClass::Routine);
395        assert_eq!(p_routine.max_rounds, 1);
396        assert!(p_routine.devil_advocate_model.is_none());
397
398        let p_const = Panel::default_panel(DecisionClass::Constitutional);
399        assert_eq!(p_const.max_rounds, 4);
400        assert!(p_const.devil_advocate_model.is_some());
401        assert_eq!(p_const.models.len(), 5);
402    }
403
404    #[test]
405    fn session_uses_caller_supplied_hlc_inputs() {
406        let panel = Panel::default_panel(DecisionClass::Routine);
407        let responses = routine_panel_responses("A, B, C", &["a", "b", "c"]);
408        let provider = DeterministicResponseProvider::with_positions(responses);
409        let submitted_at = Timestamp::new(42_000, 7);
410        let revealed_at = Timestamp::new(42_000, 8);
411        let completed_at = Timestamp::new(42_001, 0);
412        let mut session =
413            DeliberationSession::new("test".into(), panel, "What is X?".into(), provider);
414
415        let round = session
416            .execute_round(RoundExecutionTiming {
417                submitted_at,
418                revealed_at,
419            })
420            .expect("round executes with caller-supplied timing");
421        for position in round.positions.values() {
422            assert_eq!(position.submitted_at, submitted_at);
423            assert_eq!(position.revealed_at, Some(revealed_at));
424        }
425
426        let result = session
427            .finalize(FinalizationTiming { completed_at })
428            .expect("finalizes with caller-supplied timing");
429        assert_eq!(result.completed_at, completed_at);
430    }
431
432    #[test]
433    fn missing_deterministic_response_is_rejected_without_placeholder_text() {
434        let panel = Panel::default_panel(DecisionClass::Routine);
435        let mut responses = routine_panel_responses("A, B, C", &["a", "b", "c"]);
436        responses.remove("gpt-4o-mini");
437        let provider = DeterministicResponseProvider::with_positions(responses);
438        let mut session =
439            DeliberationSession::new("test".into(), panel, "What is X?".into(), provider);
440
441        let err = session
442            .execute_round(RoundExecutionTiming {
443                submitted_at: Timestamp::new(50_000, 0),
444                revealed_at: Timestamp::new(50_000, 1),
445            })
446            .expect_err("missing model response must fail closed");
447
448        match err {
449            ConsensusError::ProviderError(message) => {
450                assert!(message.contains("gpt-4o-mini"));
451                assert!(!message.contains("Mocked response"));
452            }
453            other => panic!("expected ProviderError, got {other:?}"),
454        }
455    }
456
457    #[test]
458    fn round_hash_is_canonical_cbor_with_domain_tag() {
459        let round = sample_round();
460        #[derive(Serialize)]
461        struct ExpectedRoundHashPayload<'a> {
462            domain: &'static str,
463            schema_version: &'static str,
464            round_number: u32,
465            question: &'a str,
466            positions: &'a BTreeMap<String, ModelPosition>,
467            synthesis: &'a Option<String>,
468            convergence_score_bps: u64,
469            devil_advocate_review: &'a Option<DevilAdvocateReview>,
470        }
471        let expected = exo_core::hash::hash_structured(&ExpectedRoundHashPayload {
472            domain: "exo.consensus.deliberation_round.v1",
473            schema_version: "1",
474            round_number: round.round_number,
475            question: &round.question,
476            positions: &round.positions,
477            synthesis: &round.synthesis,
478            convergence_score_bps: round.convergence_score_bps,
479            devil_advocate_review: &round.devil_advocate_review,
480        })
481        .expect("expected CBOR hash");
482
483        assert_eq!(round.compute_hash().expect("round hash"), expected);
484    }
485
486    #[test]
487    fn result_hash_is_canonical_cbor_with_domain_tag_and_completion_time() {
488        let result = sample_result(Timestamp::new(100_100, 0));
489        #[derive(Serialize)]
490        struct ExpectedResultHashPayload<'a> {
491            domain: &'static str,
492            schema_version: &'static str,
493            session_id: &'a str,
494            question: &'a str,
495            rounds: &'a [DeliberationRound],
496            final_consensus: &'a str,
497            minority_reports: &'a [MinorityReport],
498            panel_confidence_index_bps: u64,
499            rounds_to_convergence: u32,
500            devil_advocate_summary: &'a Option<String>,
501            completed_at: Timestamp,
502        }
503        let expected = exo_core::hash::hash_structured(&ExpectedResultHashPayload {
504            domain: "exo.consensus.deliberation_result.v1",
505            schema_version: "1",
506            session_id: &result.session_id,
507            question: &result.question,
508            rounds: &result.rounds,
509            final_consensus: &result.final_consensus,
510            minority_reports: &result.minority_reports,
511            panel_confidence_index_bps: result.panel_confidence_index_bps,
512            rounds_to_convergence: result.rounds_to_convergence,
513            devil_advocate_summary: &result.devil_advocate_summary,
514            completed_at: result.completed_at,
515        })
516        .expect("expected CBOR hash");
517
518        assert_eq!(result.compute_hash().expect("result hash"), expected);
519
520        let changed_completion_time = sample_result(Timestamp::new(100_101, 0));
521        assert_ne!(
522            result.compute_hash().expect("original hash"),
523            changed_completion_time
524                .compute_hash()
525                .expect("changed hash")
526        );
527    }
528
529    #[test]
530    fn production_session_source_has_no_system_time_or_mock_boundary() {
531        let source = production_source("src/session.rs");
532        let forbidden_timestamp = ["Timestamp::", "now_utc()"].concat();
533        assert!(
534            !source.contains(&forbidden_timestamp),
535            "production session code must not synthesize wall-clock timestamps"
536        );
537        assert!(
538            !source.contains("MockLlmClient") && !source.contains("llm_client"),
539            "production session boundary must not be wired through a mock LLM client"
540        );
541    }
542
543    #[test]
544    fn production_session_source_has_no_raw_text_consensus_heuristics() {
545        let source = production_source("src/session.rs");
546        assert!(
547            !source.contains(".split([',', '\\n', ';'])"),
548            "production session code must not derive structured claims by splitting raw prose"
549        );
550        assert!(
551            !source.contains("is_serious_challenge"),
552            "production session code must not derive serious objections from keyword heuristics"
553        );
554    }
555
556    #[test]
557    fn production_session_source_has_no_silent_default_final_consensus() {
558        let source = production_source("src/session.rs");
559        assert!(
560            !source.contains("unwrap_or_default"),
561            "production session finalization must fail closed instead of defaulting missing synthesis"
562        );
563    }
564
565    #[test]
566    fn production_hashing_source_has_no_json_or_silent_default_fallback() {
567        for file in ["src/round.rs", "src/record.rs"] {
568            let source = production_source(file);
569            assert!(
570                !source.contains("serde_json::to_string"),
571                "{file} must hash canonical CBOR, not JSON"
572            );
573            assert!(
574                !source.contains("unwrap_or_default"),
575                "{file} must not hide hash serialization failures"
576            );
577        }
578    }
579
580    fn routine_panel_responses(
581        response_text: &str,
582        claims: &[&str],
583    ) -> BTreeMap<String, ModelDeliberationResponse> {
584        BTreeMap::from([
585            (
586                "claude-3-haiku".to_string(),
587                response(response_text, claims),
588            ),
589            ("gpt-4o-mini".to_string(), response(response_text, claims)),
590            (
591                "gemini-1.5-flash".to_string(),
592                response(response_text, claims),
593            ),
594        ])
595    }
596
597    fn sample_round() -> DeliberationRound {
598        let mut positions = BTreeMap::new();
599        let position_text = "A, B, C".to_string();
600        positions.insert(
601            "claude-3-haiku".to_string(),
602            ModelPosition {
603                model_id: "claude-3-haiku".to_string(),
604                round: 1,
605                position_hash: commit_response(&ModelDeliberationResponse {
606                    position_text: position_text.clone(),
607                    key_claims: vec!["a".to_string(), "b".to_string(), "c".to_string()],
608                    confidence_bps: 8000,
609                })
610                .expect("structured commitment"),
611                position_text,
612                key_claims: vec!["a".to_string(), "b".to_string(), "c".to_string()],
613                confidence_bps: 8000,
614                submitted_at: Timestamp::new(100_000, 0),
615                revealed_at: Some(Timestamp::new(100_000, 1)),
616            },
617        );
618        DeliberationRound {
619            round_number: 1,
620            question: "What is X?".to_string(),
621            positions,
622            synthesis: Some("Structured consensus claims: a; b; c.".to_string()),
623            convergence_score_bps: 10000,
624            devil_advocate_review: None,
625            round_hash: exo_core::types::Hash256::ZERO,
626        }
627    }
628
629    fn sample_result(completed_at: Timestamp) -> DeliberationResult {
630        DeliberationResult {
631            session_id: "test".to_string(),
632            question: "What is X?".to_string(),
633            rounds: vec![sample_round()],
634            final_consensus: "Structured consensus claims: a; b; c.".to_string(),
635            minority_reports: Vec::new(),
636            panel_confidence_index_bps: 10000,
637            rounds_to_convergence: 1,
638            devil_advocate_summary: None,
639            deliberation_hash: exo_core::types::Hash256::ZERO,
640            completed_at,
641        }
642    }
643
644    fn production_source(path: &str) -> String {
645        let full_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join(path);
646        let source = std::fs::read_to_string(&full_path).unwrap_or_else(|e| {
647            panic!(
648                "failed to read production source {}: {e}",
649                full_path.display()
650            )
651        });
652        source
653            .split("#[cfg(test)]")
654            .next()
655            .expect("source split must have production section")
656            .to_string()
657    }
658}