exo_consensus/
session.rs

1// Copyright 2026 Exochain Foundation
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at:
6//
7//     https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// SPDX-License-Identifier: Apache-2.0
16
17use std::collections::BTreeMap;
18
19use exo_core::types::{Hash256, Timestamp};
20
21use crate::{
22    commitment::{commit_response, verify_response_commitment},
23    error::{ConsensusError, Result},
24    panel::{ModelRole, Panel},
25    record::DeliberationResult,
26    report::{MinorityReport, is_minority_report},
27    round::{DeliberationRound, DevilAdvocateReview, ModelDeliberationResponse, ModelPosition},
28    scoring::{
29        PanelConfidenceInputs, calculate_convergence, calculate_panel_confidence,
30        canonical_claim_set, consensus_claims_at_threshold,
31    },
32};
33
34#[derive(Debug, Clone)]
35pub struct DeterministicResponseProvider {
36    positions: BTreeMap<String, ModelDeliberationResponse>,
37    devil_advocate_reviews: BTreeMap<String, DevilAdvocateReview>,
38}
39
40impl DeterministicResponseProvider {
41    pub fn new(
42        positions: BTreeMap<String, ModelDeliberationResponse>,
43        devil_advocate_reviews: BTreeMap<String, DevilAdvocateReview>,
44    ) -> Self {
45        Self {
46            positions,
47            devil_advocate_reviews,
48        }
49    }
50
51    pub fn with_positions(positions: BTreeMap<String, ModelDeliberationResponse>) -> Self {
52        Self::new(positions, BTreeMap::new())
53    }
54
55    fn position_for(&self, model_id: &str) -> Result<ModelDeliberationResponse> {
56        self.positions.get(model_id).cloned().ok_or_else(|| {
57            ConsensusError::ProviderError(format!(
58                "missing structured deterministic response for model {model_id}"
59            ))
60        })
61    }
62
63    fn devil_advocate_review_for(&self, model_id: &str) -> Result<DevilAdvocateReview> {
64        self.devil_advocate_reviews
65            .get(model_id)
66            .cloned()
67            .ok_or_else(|| {
68                ConsensusError::ProviderError(format!(
69                    "missing structured devil's advocate review for model {model_id}"
70                ))
71            })
72    }
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76pub struct RoundExecutionTiming {
77    pub submitted_at: Timestamp,
78    pub revealed_at: Timestamp,
79}
80
81impl RoundExecutionTiming {
82    fn validate(&self) -> Result<()> {
83        if self.submitted_at == Timestamp::ZERO {
84            return Err(ConsensusError::StateError(
85                "round submitted_at must be caller-supplied non-zero HLC".into(),
86            ));
87        }
88        if self.revealed_at < self.submitted_at {
89            return Err(ConsensusError::StateError(
90                "round revealed_at must not precede submitted_at".into(),
91            ));
92        }
93        Ok(())
94    }
95}
96
97#[derive(Debug, Clone, Copy, PartialEq, Eq)]
98pub struct FinalizationTiming {
99    pub completed_at: Timestamp,
100}
101
102impl FinalizationTiming {
103    fn validate(&self) -> Result<()> {
104        if self.completed_at == Timestamp::ZERO {
105            return Err(ConsensusError::StateError(
106                "finalization completed_at must be caller-supplied non-zero HLC".into(),
107            ));
108        }
109        Ok(())
110    }
111}
112
113pub struct DeliberationSession {
114    pub session_id: String,
115    pub panel: Panel,
116    pub question: String,
117    pub current_round: u32,
118    pub rounds: Vec<DeliberationRound>,
119    pub response_provider: DeterministicResponseProvider,
120}
121
122impl DeliberationSession {
123    pub fn new(
124        session_id: String,
125        panel: Panel,
126        question: String,
127        response_provider: DeterministicResponseProvider,
128    ) -> Self {
129        Self {
130            session_id,
131            panel,
132            question,
133            current_round: 1,
134            rounds: Vec::new(),
135            response_provider,
136        }
137    }
138
139    pub fn execute_round(&mut self, timing: RoundExecutionTiming) -> Result<DeliberationRound> {
140        if self.current_round > self.panel.max_rounds {
141            return Err(ConsensusError::RoundLimitExceeded);
142        }
143        timing.validate()?;
144        let next_round = self
145            .current_round
146            .checked_add(1)
147            .ok_or(ConsensusError::RoundLimitExceeded)?;
148
149        let mut positions = BTreeMap::new();
150
151        // 1. Commitment Phase
152        let mut commitments = BTreeMap::new();
153        for model in &self.panel.models {
154            if model.role == ModelRole::Panelist {
155                let response = self.response_provider.position_for(&model.model_id)?;
156                let response = validate_model_response(&model.model_id, response)?;
157                let position_hash = commit_response(&response)?;
158                commitments.insert(model.model_id.clone(), (response, position_hash));
159            }
160        }
161
162        // 2. Reveal & Verify Phase
163        for (model_id, (response, position_hash)) in commitments {
164            if !verify_response_commitment(&response, &position_hash)? {
165                return Err(ConsensusError::CommitmentMismatch { model_id });
166            }
167
168            let pos = ModelPosition {
169                model_id: model_id.clone(),
170                round: self.current_round,
171                position_hash,
172                position_text: response.position_text,
173                key_claims: response.key_claims,
174                confidence_bps: response.confidence_bps,
175                submitted_at: timing.submitted_at,
176                revealed_at: Some(timing.revealed_at),
177            };
178
179            positions.insert(model_id, pos);
180        }
181
182        // 3. Scoring
183        let claim_sets = position_claim_sets(&positions);
184        let convergence_score_bps = calculate_convergence(&claim_sets);
185        let consensus_claims =
186            consensus_claims_at_threshold(&claim_sets, self.panel.convergence_threshold_bps);
187
188        // 4. Synthesis
189        let synthesis_text = consensus_summary(&consensus_claims);
190
191        // 5. Devil's Advocate (only if converging well or on final round)
192        let mut devil_advocate_review = None;
193        if convergence_score_bps >= self.panel.convergence_threshold_bps
194            || self.current_round == self.panel.max_rounds
195        {
196            if let Some(da_id) = &self.panel.devil_advocate_model {
197                let review = self.response_provider.devil_advocate_review_for(da_id)?;
198                devil_advocate_review = Some(validate_devil_advocate_review(da_id, review)?);
199            }
200        }
201
202        let mut round = DeliberationRound {
203            round_number: self.current_round,
204            question: self.question.clone(),
205            positions,
206            synthesis: Some(synthesis_text),
207            convergence_score_bps,
208            devil_advocate_review,
209            round_hash: Hash256::ZERO,
210        };
211
212        round.round_hash = round.compute_hash()?;
213
214        self.rounds.push(round.clone());
215        self.current_round = next_round;
216
217        Ok(round)
218    }
219
220    pub fn is_converged(&self) -> bool {
221        if let Some(last) = self.rounds.last() {
222            return last.convergence_score_bps >= self.panel.convergence_threshold_bps;
223        }
224        false
225    }
226
227    /// Finalize the session, consuming it so the full round history moves into
228    /// the result without duplicating consensus evidence in memory.
229    pub fn finalize(self, timing: FinalizationTiming) -> Result<DeliberationResult> {
230        timing.validate()?;
231
232        let DeliberationSession {
233            session_id,
234            panel,
235            question,
236            rounds,
237            ..
238        } = self;
239
240        if rounds.is_empty() {
241            return Err(ConsensusError::StateError(
242                "Cannot finalize without any rounds".into(),
243            ));
244        }
245
246        let Some(last_round) = rounds.last() else {
247            return Err(ConsensusError::StateError(
248                "Rounds exist but last() failed".into(),
249            ));
250        };
251        let final_consensus = last_round
252            .synthesis
253            .as_deref()
254            .map(str::trim)
255            .filter(|synthesis| !synthesis.is_empty())
256            .ok_or_else(|| {
257                ConsensusError::StateError(
258                    "Cannot finalize round with missing synthesis evidence".into(),
259                )
260            })?
261            .to_string();
262        let mut minority_reports = Vec::new();
263
264        let claim_sets = position_claim_sets(&last_round.positions);
265        let consensus_claims =
266            consensus_claims_at_threshold(&claim_sets, panel.convergence_threshold_bps);
267        let no_consensus_claims = consensus_claims.is_empty();
268
269        for pos in last_round.positions.values() {
270            if no_consensus_claims
271                || is_minority_report(pos, &consensus_claims, panel.convergence_threshold_bps)
272            {
273                let reasons = if no_consensus_claims {
274                    vec!["No structured consensus claims met threshold.".to_string()]
275                } else {
276                    let missing_claims = missing_consensus_claims(pos, &consensus_claims);
277                    vec![format!(
278                        "Missing structured consensus claims: {}",
279                        missing_claims.join(", ")
280                    )]
281                };
282                minority_reports.push(MinorityReport {
283                    model_id: pos.model_id.clone(),
284                    round: pos.round,
285                    dissenting_position: pos.position_text.clone(),
286                    reasons,
287                    divergence_score_bps: 10_000u64
288                        .saturating_sub(last_round.convergence_score_bps),
289                });
290            }
291        }
292
293        let mut da_summary = None;
294        let mut serious_objection = false;
295        if let Some(review) = &last_round.devil_advocate_review {
296            da_summary = Some(review.review_text.clone());
297            serious_objection = review.serious_objection;
298        }
299
300        let panelists_count = usize_to_u32(
301            "panelists_count",
302            panel
303                .models
304                .iter()
305                .filter(|m| m.role == ModelRole::Panelist)
306                .count(),
307        )?;
308        let minority_reports_count =
309            usize_to_u32("minority_reports_count", minority_reports.len())?;
310        let rounds_to_convergence = usize_to_u32("rounds_to_convergence", rounds.len())?;
311        let converged = last_round.convergence_score_bps >= panel.convergence_threshold_bps;
312        let models_agreeing = if no_consensus_claims {
313            0
314        } else {
315            panelists_count.saturating_sub(minority_reports_count)
316        };
317
318        let inputs = PanelConfidenceInputs {
319            models_agreeing,
320            total_models: panelists_count,
321            converged,
322            rounds_to_convergence,
323            max_rounds: panel.max_rounds,
324            devil_found_serious_objection: serious_objection,
325            minority_reports_count,
326        };
327
328        let pci = calculate_panel_confidence(&inputs);
329
330        let mut result = DeliberationResult {
331            session_id,
332            question,
333            rounds,
334            final_consensus,
335            minority_reports,
336            panel_confidence_index_bps: pci,
337            rounds_to_convergence,
338            devil_advocate_summary: da_summary,
339            deliberation_hash: Hash256::ZERO,
340            completed_at: timing.completed_at,
341        };
342
343        result.deliberation_hash = result.compute_hash()?;
344
345        Ok(result)
346    }
347}
348
349fn validate_model_response(
350    model_id: &str,
351    response: ModelDeliberationResponse,
352) -> Result<ModelDeliberationResponse> {
353    let position_text = response.position_text.trim().to_string();
354    if position_text.is_empty() {
355        return Err(ConsensusError::ProviderError(format!(
356            "structured deterministic response for model {model_id} has empty position_text"
357        )));
358    }
359    if response.confidence_bps > 10000 {
360        return Err(ConsensusError::ProviderError(format!(
361            "structured deterministic response for model {model_id} has confidence_bps above 10000"
362        )));
363    }
364    let key_claims = canonical_claim_set(&response.key_claims);
365    if key_claims.is_empty() {
366        return Err(ConsensusError::ProviderError(format!(
367            "structured deterministic response for model {model_id} must include explicit key_claims"
368        )));
369    }
370
371    Ok(ModelDeliberationResponse {
372        position_text,
373        key_claims,
374        confidence_bps: response.confidence_bps,
375    })
376}
377
378fn validate_devil_advocate_review(
379    model_id: &str,
380    review: DevilAdvocateReview,
381) -> Result<DevilAdvocateReview> {
382    let review_text = review.review_text.trim().to_string();
383    if review_text.is_empty() {
384        return Err(ConsensusError::ProviderError(format!(
385            "structured devil's advocate review for model {model_id} has empty review_text"
386        )));
387    }
388
389    let reasons = canonical_claim_set(&review.reasons);
390    if review.serious_objection && reasons.is_empty() {
391        return Err(ConsensusError::ProviderError(format!(
392            "structured devil's advocate review for model {model_id} marks serious_objection without reasons"
393        )));
394    }
395
396    Ok(DevilAdvocateReview {
397        review_text,
398        serious_objection: review.serious_objection,
399        reasons,
400    })
401}
402
403fn position_claim_sets(positions: &BTreeMap<String, ModelPosition>) -> Vec<Vec<String>> {
404    positions
405        .values()
406        .map(|position| position.key_claims.clone())
407        .collect()
408}
409
410fn consensus_summary(consensus_claims: &[String]) -> String {
411    if consensus_claims.is_empty() {
412        "No structured consensus claims met threshold.".to_string()
413    } else {
414        format!(
415            "Structured consensus claims: {}.",
416            consensus_claims.join("; ")
417        )
418    }
419}
420
421fn missing_consensus_claims(position: &ModelPosition, consensus_claims: &[String]) -> Vec<String> {
422    let position_claims = canonical_claim_set(&position.key_claims);
423    consensus_claims
424        .iter()
425        .filter(|claim| !position_claims.contains(claim))
426        .cloned()
427        .collect()
428}
429
430fn usize_to_u32(field: &'static str, value: usize) -> Result<u32> {
431    u32::try_from(value).map_err(|_| {
432        ConsensusError::StateError(format!(
433            "finalization count {field} value {value} exceeds u32::MAX"
434        ))
435    })
436}
437
438#[cfg(test)]
439#[allow(clippy::unwrap_used, clippy::expect_used)]
440mod tests {
441    use decision_forum::decision_object::DecisionClass;
442
443    use super::*;
444
445    fn response(text: &str, claims: &[&str]) -> ModelDeliberationResponse {
446        ModelDeliberationResponse {
447            position_text: text.to_string(),
448            key_claims: claims.iter().map(|claim| (*claim).to_string()).collect(),
449            confidence_bps: 8000,
450        }
451    }
452
453    fn routine_responses(
454        response_text: &str,
455        claims: &[&str],
456    ) -> BTreeMap<String, ModelDeliberationResponse> {
457        BTreeMap::from([
458            (
459                "claude-3-haiku".to_string(),
460                response(response_text, claims),
461            ),
462            ("gpt-4o-mini".to_string(), response(response_text, claims)),
463            (
464                "gemini-1.5-flash".to_string(),
465                response(response_text, claims),
466            ),
467        ])
468    }
469
470    fn neutral_review() -> DevilAdvocateReview {
471        DevilAdvocateReview {
472            review_text: "No threshold objection found.".to_string(),
473            serious_objection: false,
474            reasons: Vec::new(),
475        }
476    }
477
478    fn timing(round: u64) -> RoundExecutionTiming {
479        RoundExecutionTiming {
480            submitted_at: Timestamp::new(round * 10, 0),
481            revealed_at: Timestamp::new(round * 10, 1),
482        }
483    }
484
485    fn finalization_timing() -> FinalizationTiming {
486        FinalizationTiming {
487            completed_at: Timestamp::new(1000, 0),
488        }
489    }
490
491    // Covers line 45: execute_round returns RoundLimitExceeded when current_round > max_rounds.
492    #[test]
493    fn execute_round_returns_round_limit_exceeded_when_current_round_exceeds_max() {
494        let panel = Panel::default_panel(DecisionClass::Routine); // max_rounds = 1
495        let provider =
496            DeterministicResponseProvider::with_positions(routine_responses("A, B", &["a", "b"]));
497        let mut session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
498        // First round succeeds; second should be rejected because current_round (2) > max_rounds (1).
499        let first = session.execute_round(timing(1)).expect("first round ok");
500        assert_eq!(first.round_number, 1);
501        assert_eq!(session.current_round, 2);
502        let err = session
503            .execute_round(timing(2))
504            .expect_err("must exceed limit");
505        assert!(matches!(err, ConsensusError::RoundLimitExceeded));
506        // The failed call must not push a round or advance the counter.
507        assert_eq!(session.rounds.len(), 1);
508        assert_eq!(session.current_round, 2);
509    }
510
511    #[test]
512    fn execute_round_rejects_u32_max_round_counter_without_overflowing() {
513        let mut panel = Panel::default_panel(DecisionClass::Routine);
514        panel.max_rounds = u32::MAX;
515        let provider =
516            DeterministicResponseProvider::with_positions(routine_responses("A, B", &["a", "b"]));
517        let mut session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
518        session.current_round = u32::MAX;
519
520        let err = session
521            .execute_round(timing(1))
522            .expect_err("u32::MAX round counter must fail closed");
523
524        assert!(matches!(err, ConsensusError::RoundLimitExceeded));
525        assert!(session.rounds.is_empty());
526        assert_eq!(session.current_round, u32::MAX);
527    }
528
529    // Covers lines 130-131: is_converged returns false when last round's score is below threshold.
530    #[test]
531    fn is_converged_false_when_last_round_below_threshold() {
532        // Operational panel: threshold 7500, 3 panelists. Distinct responses => convergence 0.
533        let panel = Panel::default_panel(DecisionClass::Operational);
534        let mut responses = BTreeMap::new();
535        responses.insert(
536            "claude-3-5-sonnet".into(),
537            response("alpha position", &["alpha"]),
538        );
539        responses.insert("gpt-4o".into(), response("beta position", &["beta"]));
540        responses.insert(
541            "gemini-1.5-pro".into(),
542            response("gamma position", &["gamma"]),
543        );
544        let provider = DeterministicResponseProvider::with_positions(responses);
545        let mut session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
546        let round = session.execute_round(timing(1)).unwrap();
547        // Zero overlap across three distinct claims => 0 bps, clearly below the 7500 threshold.
548        assert_eq!(round.convergence_score_bps, 0);
549        assert!(!session.is_converged());
550    }
551
552    #[test]
553    fn finalize_no_consensus_does_not_award_full_panel_confidence() {
554        let panel = Panel::default_panel(DecisionClass::Routine);
555        let responses = BTreeMap::from([
556            (
557                "claude-3-haiku".to_string(),
558                response("alpha position", &["alpha"]),
559            ),
560            (
561                "gpt-4o-mini".to_string(),
562                response("beta position", &["beta"]),
563            ),
564            (
565                "gemini-1.5-flash".to_string(),
566                response("gamma position", &["gamma"]),
567            ),
568        ]);
569        let provider = DeterministicResponseProvider::with_positions(responses);
570        let mut session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
571
572        let round = session.execute_round(timing(1)).unwrap();
573        assert_eq!(round.convergence_score_bps, 0);
574        let result = session.finalize(finalization_timing()).unwrap();
575
576        assert_eq!(result.minority_reports.len(), 3);
577        assert!(
578            result.panel_confidence_index_bps <= 2_000,
579            "no-consensus deliberation must not receive high confidence, got {}",
580            result.panel_confidence_index_bps
581        );
582    }
583
584    // Covers is_converged false branch when no rounds have been executed yet.
585    #[test]
586    fn is_converged_false_when_no_rounds() {
587        let panel = Panel::default_panel(DecisionClass::Routine);
588        let provider = DeterministicResponseProvider::with_positions(BTreeMap::new());
589        let session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
590        assert!(!session.is_converged());
591    }
592
593    // Covers lines 136-138: finalize returns StateError when no rounds have been executed.
594    #[test]
595    fn finalize_errors_with_state_error_when_rounds_empty() {
596        let panel = Panel::default_panel(DecisionClass::Routine);
597        let provider = DeterministicResponseProvider::with_positions(BTreeMap::new());
598        let session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
599        let err = session
600            .finalize(finalization_timing())
601            .expect_err("must fail when empty");
602        match err {
603            ConsensusError::StateError(msg) => {
604                assert!(
605                    msg.contains("Cannot finalize without any rounds"),
606                    "unexpected state error message: {msg}"
607                );
608            }
609            other => panic!("expected StateError, got {other:?}"),
610        }
611    }
612
613    #[test]
614    fn finalize_rejects_round_without_synthesis_evidence() {
615        let panel = Panel::default_panel(DecisionClass::Routine);
616        let provider = DeterministicResponseProvider::with_positions(routine_responses(
617            "shared position",
618            &["shared claim"],
619        ));
620        let mut session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
621        session
622            .execute_round(timing(1))
623            .expect("round executes with synthesis");
624        session.rounds[0].synthesis = None;
625
626        let err = session
627            .finalize(finalization_timing())
628            .expect_err("missing synthesis evidence must fail closed");
629
630        match err {
631            ConsensusError::StateError(message) => {
632                assert!(message.contains("missing synthesis"));
633            }
634            other => panic!("expected StateError, got {other:?}"),
635        }
636    }
637
638    #[test]
639    fn finalize_rejects_blank_synthesis_evidence() {
640        let panel = Panel::default_panel(DecisionClass::Routine);
641        let provider = DeterministicResponseProvider::with_positions(routine_responses(
642            "shared position",
643            &["shared claim"],
644        ));
645        let mut session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
646        session
647            .execute_round(timing(1))
648            .expect("round executes with synthesis");
649        session.rounds[0].synthesis = Some("   ".to_string());
650
651        let err = session
652            .finalize(finalization_timing())
653            .expect_err("blank synthesis evidence must fail closed");
654
655        match err {
656            ConsensusError::StateError(message) => {
657                assert!(message.contains("missing synthesis"));
658            }
659            other => panic!("expected StateError, got {other:?}"),
660        }
661    }
662
663    #[test]
664    fn execute_round_rejects_text_only_response_without_structured_claims() {
665        let panel = Panel::default_panel(DecisionClass::Routine);
666        let provider = DeterministicResponseProvider::with_positions(routine_responses(
667            "raw text has commas, but no structured claims",
668            &[],
669        ));
670        let mut session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
671
672        let err = session
673            .execute_round(timing(1))
674            .expect_err("text-only model response must fail closed");
675
676        match err {
677            ConsensusError::ProviderError(message) => {
678                assert!(message.contains("explicit key_claims"));
679            }
680            other => panic!("expected ProviderError, got {other:?}"),
681        }
682    }
683
684    #[test]
685    fn execute_round_rejects_out_of_range_model_confidence() {
686        let panel = Panel::default_panel(DecisionClass::Routine);
687        let mut responses = routine_responses("claim text", &["claim"]);
688        let mut invalid = response("claim text", &["claim"]);
689        invalid.confidence_bps = 10001;
690        responses.insert("gpt-4o-mini".to_string(), invalid);
691        let provider = DeterministicResponseProvider::with_positions(responses);
692        let mut session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
693
694        let err = session
695            .execute_round(timing(1))
696            .expect_err("confidence above 10000 bps must fail closed");
697
698        match err {
699            ConsensusError::ProviderError(message) => {
700                assert!(message.contains("confidence_bps above 10000"));
701            }
702            other => panic!("expected ProviderError, got {other:?}"),
703        }
704    }
705
706    // Covers line 100 true branch: devil's advocate runs on final round even when convergence is low.
707    #[test]
708    fn devil_advocate_runs_on_final_round_even_without_convergence() {
709        // Operational panel: max_rounds = 2, threshold 7500, DA = "gpt-4o".
710        let panel = Panel::default_panel(DecisionClass::Operational);
711        let mut responses = BTreeMap::new();
712        // Distinct responses so convergence < threshold in both rounds.
713        responses.insert(
714            "claude-3-5-sonnet".into(),
715            response("alpha position", &["alpha"]),
716        );
717        responses.insert("gpt-4o".into(), response("beta position", &["beta"]));
718        responses.insert(
719            "gemini-1.5-pro".into(),
720            response("gamma position", &["gamma"]),
721        );
722        let provider = DeterministicResponseProvider::new(
723            responses,
724            BTreeMap::from([("gpt-4o".to_string(), neutral_review())]),
725        );
726        let mut session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
727
728        // Round 1: not final, convergence below threshold -> DA does NOT run.
729        let r1 = session.execute_round(timing(1)).unwrap();
730        assert!(r1.convergence_score_bps < 7500);
731        assert!(
732            r1.devil_advocate_review.is_none(),
733            "DA should not run when neither converged nor on the final round"
734        );
735
736        // Round 2: final round, still below threshold -> DA MUST run via the line-100 clause.
737        let r2 = session.execute_round(timing(2)).unwrap();
738        assert_eq!(r2.round_number, 2);
739        assert!(r2.convergence_score_bps < 7500);
740        assert!(
741            r2.devil_advocate_review.is_some(),
742            "DA must trigger on the final round even without convergence"
743        );
744
745        // And finalize must surface that DA summary on the result.
746        let result = session.finalize(finalization_timing()).unwrap();
747        assert!(result.devil_advocate_summary.is_some());
748    }
749
750    #[test]
751    fn devil_advocate_keyword_text_is_not_binding_without_serious_flag() {
752        let panel = Panel::default_panel(DecisionClass::Operational);
753        let positions = BTreeMap::from([
754            (
755                "claude-3-5-sonnet".to_string(),
756                response("shared position", &["shared claim"]),
757            ),
758            (
759                "gpt-4o".to_string(),
760                response("shared position", &["shared claim"]),
761            ),
762            (
763                "gemini-1.5-pro".to_string(),
764                response("shared position", &["shared claim"]),
765            ),
766        ]);
767        let reviews = BTreeMap::from([(
768            "gpt-4o".to_string(),
769            DevilAdvocateReview {
770                review_text: "The prose says serious and fatal but the structured flag is false."
771                    .to_string(),
772                serious_objection: false,
773                reasons: Vec::new(),
774            },
775        )]);
776        let provider = DeterministicResponseProvider::new(positions, reviews);
777        let mut session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
778
779        session.execute_round(timing(1)).unwrap();
780        let result = session.finalize(finalization_timing()).unwrap();
781
782        assert_eq!(result.panel_confidence_index_bps, 10000);
783        assert_eq!(
784            result.devil_advocate_summary.as_deref(),
785            Some("The prose says serious and fatal but the structured flag is false.")
786        );
787    }
788
789    #[test]
790    fn finalize_saturates_minority_divergence_when_convergence_exceeds_bps_ceiling() {
791        let panel = Panel::default_panel(DecisionClass::Routine);
792        let responses = BTreeMap::from([
793            (
794                "claude-3-haiku".to_string(),
795                response("shared position", &["shared claim"]),
796            ),
797            (
798                "gpt-4o-mini".to_string(),
799                response("also shared", &["shared claim"]),
800            ),
801            (
802                "gemini-1.5-flash".to_string(),
803                response("minority position", &["minority claim"]),
804            ),
805        ]);
806        let provider = DeterministicResponseProvider::with_positions(responses);
807        let mut session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
808
809        session.execute_round(timing(1)).unwrap();
810        session.rounds[0].convergence_score_bps = 10_001;
811
812        let result = session.finalize(finalization_timing()).unwrap();
813
814        assert_eq!(result.minority_reports.len(), 1);
815        assert_eq!(result.minority_reports[0].divergence_score_bps, 0);
816    }
817
818    #[test]
819    fn production_finalization_does_not_default_failed_u32_conversions_to_zero() {
820        let source = include_str!("session.rs");
821        let production = source
822            .split("\n#[cfg(test)]")
823            .next()
824            .expect("production section");
825
826        assert!(
827            !production.contains(".unwrap_or(0)"),
828            "failed finalization count conversions must fail closed instead of defaulting to zero"
829        );
830        assert!(
831            production.contains("usize_to_u32(\"rounds_to_convergence\""),
832            "round count conversion must use the typed finalization count helper"
833        );
834        assert!(
835            production.contains("usize_to_u32(\"minority_reports_count\""),
836            "minority report count conversion must use the typed finalization count helper"
837        );
838    }
839
840    #[test]
841    fn production_finalization_moves_rounds_without_cloning_full_history() {
842        let source = include_str!("session.rs");
843        let production = source
844            .split("\n#[cfg(test)]")
845            .next()
846            .expect("production section");
847        let finalize = production
848            .split("pub fn finalize(")
849            .nth(1)
850            .and_then(|section| section.split("\nfn validate_model_response").next())
851            .expect("finalize implementation");
852
853        assert!(
854            !finalize.contains("self.rounds.clone()"),
855            "DeliberationSession::finalize must not clone the full round history"
856        );
857        assert!(
858            production.contains("pub fn finalize(self,"),
859            "DeliberationSession::finalize must consume the session so rounds can move into the result"
860        );
861    }
862
863    #[test]
864    fn devil_advocate_serious_objection_requires_reasons_and_penalizes_panel_confidence() {
865        let panel = Panel::default_panel(DecisionClass::Operational);
866        let positions = BTreeMap::from([
867            (
868                "claude-3-5-sonnet".to_string(),
869                response("shared position", &["shared claim"]),
870            ),
871            (
872                "gpt-4o".to_string(),
873                response("shared position", &["shared claim"]),
874            ),
875            (
876                "gemini-1.5-pro".to_string(),
877                response("shared position", &["shared claim"]),
878            ),
879        ]);
880        let reviews = BTreeMap::from([(
881            "gpt-4o".to_string(),
882            DevilAdvocateReview {
883                review_text: "Structured objection accepted.".to_string(),
884                serious_objection: true,
885                reasons: vec!["missing safety bound".to_string()],
886            },
887        )]);
888        let provider = DeterministicResponseProvider::new(positions, reviews);
889        let mut session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
890
891        session.execute_round(timing(1)).unwrap();
892        let result = session.finalize(finalization_timing()).unwrap();
893
894        assert_eq!(result.panel_confidence_index_bps, 8000);
895
896        let invalid_positions = BTreeMap::from([
897            (
898                "claude-3-5-sonnet".to_string(),
899                response("shared position", &["shared claim"]),
900            ),
901            (
902                "gpt-4o".to_string(),
903                response("shared position", &["shared claim"]),
904            ),
905            (
906                "gemini-1.5-pro".to_string(),
907                response("shared position", &["shared claim"]),
908            ),
909        ]);
910        let invalid_reviews = BTreeMap::from([(
911            "gpt-4o".to_string(),
912            DevilAdvocateReview {
913                review_text: "Structured objection lacks reasons.".to_string(),
914                serious_objection: true,
915                reasons: Vec::new(),
916            },
917        )]);
918        let provider = DeterministicResponseProvider::new(invalid_positions, invalid_reviews);
919        let mut session = DeliberationSession::new(
920            "s2".into(),
921            Panel::default_panel(DecisionClass::Operational),
922            "Q?".into(),
923            provider,
924        );
925        let err = session
926            .execute_round(timing(1))
927            .expect_err("serious objection without reasons must fail closed");
928        match err {
929            ConsensusError::ProviderError(message) => {
930                assert!(message.contains("marks serious_objection without reasons"));
931            }
932            other => panic!("expected ProviderError, got {other:?}"),
933        }
934    }
935
936    // Covers the DA-skipped branch when the panel has no devil_advocate_model configured.
937    #[test]
938    fn devil_advocate_skipped_when_panel_has_no_da_model_even_on_converged_final_round() {
939        // Routine panel: max_rounds = 1 (final), devil_advocate_model = None.
940        let panel = Panel::default_panel(DecisionClass::Routine);
941        assert!(panel.devil_advocate_model.is_none());
942        let provider = DeterministicResponseProvider::with_positions(routine_responses(
943            "same claim",
944            &["same claim"],
945        ));
946        let mut session = DeliberationSession::new("s".into(), panel, "Q?".into(), provider);
947        let round = session.execute_round(timing(1)).unwrap();
948        // Convergence is 10000 (all identical) and we are at the final round,
949        // but devil_advocate_model is None => the inner `if let Some(..)` is false.
950        assert_eq!(round.convergence_score_bps, 10000);
951        assert!(round.devil_advocate_review.is_none());
952    }
953}
exo_consensus/session.rs

exo_consensus/
session.rs