reasonkit/thinktool/
debate.rs

1//! # Multi-Agent Debate Architecture
2//!
3//! Implements adversarial debate between multiple agents for improved factuality.
4//! Based on ICML 2024 research showing +20% factuality improvement.
5//!
6//! ## Scientific Foundation
7//!
8//! - Du et al. (ICML 2024): "Improving Factuality and Reasoning through Self-Debate"
9//! - Irving et al. (2018): "AI Safety via Debate"
10//!
11//! ## Core Concept
12//!
13//! ```text
14//! ┌─────────────────────────────────────────────────────────────────────┐
15//! │                    MULTI-AGENT DEBATE                               │
16//! ├─────────────────────────────────────────────────────────────────────┤
17//! │                                                                     │
18//! │   ADVOCATE ◄─────────────────────────► CRITIC                      │
19//! │   (Pro position)        Rounds        (Con position)               │
20//! │        │                               │                           │
21//! │        └───────────┬───────────────────┘                           │
22//! │                    ▼                                               │
23//! │              SYNTHESIZER                                           │
24//! │        (Weighs arguments, final verdict)                           │
25//! │                    │                                               │
26//! │                    ▼                                               │
27//! │              FINAL OUTPUT                                          │
28//! │        (Balanced, fact-checked conclusion)                         │
29//! │                                                                     │
30//! └─────────────────────────────────────────────────────────────────────┘
31//! ```
32//!
33//! ## Usage
34//!
35//! ```rust,ignore
36//! use reasonkit::thinktool::debate::{DebateArena, AgentRole, DebateConfig};
37//!
38//! let arena = DebateArena::new(DebateConfig {
39//!     rounds: 3,
40//!     ..Default::default()
41//! });
42//!
43//! let result = arena.debate("Is nuclear power safe?").await?;
44//! println!("Verdict: {:?}", result.verdict);
45//! ```
46
47use serde::{Deserialize, Serialize};
48use std::collections::HashMap;
49
50/// Role of an agent in the debate
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
52pub enum AgentRole {
53    /// Argues in favor of the proposition
54    Advocate,
55    /// Argues against / finds flaws
56    Critic,
57    /// Weighs evidence and synthesizes
58    Synthesizer,
59    /// Checks facts and sources
60    FactChecker,
61    /// Considers alternatives
62    DevilsAdvocate,
63}
64
65impl AgentRole {
66    pub fn system_prompt(&self) -> &'static str {
67        match self {
68            AgentRole::Advocate => {
69                "You are the ADVOCATE. Your role is to argue in FAVOR of the proposition.
70Present the strongest possible case. Use evidence, logic, and persuasion.
71Acknowledge weaknesses only to preempt counterarguments.
72Your goal: Make the most compelling case for the position."
73            }
74            AgentRole::Critic => {
75                "You are the CRITIC. Your role is to find FLAWS in the proposition.
76Challenge assumptions. Identify weak evidence. Find logical gaps.
77Present counterarguments and alternative explanations.
78Your goal: Expose weaknesses and potential errors."
79            }
80            AgentRole::Synthesizer => {
81                "You are the SYNTHESIZER. Your role is to weigh all arguments fairly.
82Evaluate the strength of each side's evidence and logic.
83Identify where the truth likely lies. Note remaining uncertainties.
84Your goal: Produce a balanced, well-reasoned verdict."
85            }
86            AgentRole::FactChecker => {
87                "You are the FACT-CHECKER. Your role is to verify claims.
88Check sources. Identify unsupported assertions. Flag misinformation.
89Rate the factual accuracy of each claim (0-100%).
90Your goal: Ensure all claims are grounded in verifiable facts."
91            }
92            AgentRole::DevilsAdvocate => {
93                "You are the DEVIL'S ADVOCATE. Your role is to consider alternatives.
94What if the opposite were true? What are we missing?
95Challenge consensus. Explore edge cases and unlikely scenarios.
96Your goal: Ensure all perspectives have been considered."
97            }
98        }
99    }
100}
101
102/// A single argument in the debate
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct Argument {
105    /// Which agent made this argument
106    pub role: AgentRole,
107    /// The argument content
108    pub content: String,
109    /// Round number
110    pub round: usize,
111    /// Claims made in this argument
112    pub claims: Vec<Claim>,
113    /// Evidence cited
114    pub evidence: Vec<Evidence>,
115    /// Strength rating (0.0-1.0)
116    pub strength: f32,
117    /// Rebuttals to previous arguments
118    pub rebuttals: Vec<Rebuttal>,
119    /// Points conceded to the opponent
120    #[serde(default)]
121    pub concessions: Vec<String>,
122}
123
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct Claim {
126    pub statement: String,
127    pub confidence: f32,
128    pub verified: Option<bool>,
129}
130
131#[derive(Debug, Clone, Serialize, Deserialize)]
132pub struct Evidence {
133    pub description: String,
134    pub source: Option<String>,
135    pub credibility: f32,
136}
137
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct Rebuttal {
140    /// What is being rebutted
141    pub target_claim: String,
142    /// The counter-argument
143    pub counter: String,
144    /// Strength of the rebuttal
145    pub effectiveness: f32,
146}
147
148/// Configuration for the debate
149#[derive(Debug, Clone, Serialize, Deserialize)]
150pub struct DebateConfig {
151    /// Number of debate rounds
152    pub rounds: usize,
153    /// Roles to include
154    pub roles: Vec<AgentRole>,
155    /// Whether to include fact-checking
156    pub fact_check: bool,
157    /// Whether to include devil's advocate
158    pub devils_advocate: bool,
159    /// Minimum argument strength to continue
160    pub min_strength_threshold: f32,
161    /// Whether to allow concession of points
162    pub allow_concessions: bool,
163}
164
165impl Default for DebateConfig {
166    fn default() -> Self {
167        Self {
168            rounds: 3,
169            roles: vec![
170                AgentRole::Advocate,
171                AgentRole::Critic,
172                AgentRole::Synthesizer,
173            ],
174            fact_check: true,
175            devils_advocate: false,
176            min_strength_threshold: 0.3,
177            allow_concessions: true,
178        }
179    }
180}
181
182/// Final verdict from the debate
183#[derive(Debug, Clone, Serialize, Deserialize)]
184pub struct DebateVerdict {
185    /// Verdict type
186    pub verdict_type: VerdictType,
187    /// Summary of the conclusion
188    pub summary: String,
189    /// Confidence in the verdict
190    pub confidence: f32,
191    /// Key points that won the debate
192    pub winning_points: Vec<String>,
193    /// Unresolved issues
194    pub unresolved: Vec<String>,
195    /// Recommendations
196    pub recommendations: Vec<String>,
197}
198
199#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
200pub enum VerdictType {
201    /// Proposition is supported
202    Affirmed,
203    /// Proposition is refuted
204    Refuted,
205    /// Evidence is balanced/mixed
206    Balanced,
207    /// Cannot determine
208    Inconclusive,
209    /// True with qualifications
210    PartiallyAffirmed,
211    /// More investigation needed
212    RequiresFurtherInvestigation,
213}
214
215/// Complete result of a debate
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct DebateResult {
218    /// The proposition debated
219    pub proposition: String,
220    /// All arguments made
221    pub arguments: Vec<Argument>,
222    /// The final verdict
223    pub verdict: DebateVerdict,
224    /// Debate statistics
225    pub stats: DebateStats,
226    /// Factual claims verified (if fact-checking enabled)
227    pub fact_check_results: HashMap<String, bool>,
228}
229
230#[derive(Debug, Clone, Default, Serialize, Deserialize)]
231pub struct DebateStats {
232    pub total_rounds: usize,
233    pub advocate_arguments: usize,
234    pub critic_arguments: usize,
235    pub claims_made: usize,
236    pub claims_rebutted: usize,
237    pub evidence_cited: usize,
238    pub concessions_made: usize,
239    pub avg_argument_strength: f32,
240}
241
242/// The debate arena where agents debate
243pub struct DebateArena {
244    pub config: DebateConfig,
245    arguments: Vec<Argument>,
246    proposition: Option<String>,
247    current_round: usize,
248}
249
250impl DebateArena {
251    pub fn new(config: DebateConfig) -> Self {
252        Self {
253            config,
254            arguments: Vec::new(),
255            proposition: None,
256            current_round: 0,
257        }
258    }
259
260    pub fn set_proposition(&mut self, proposition: impl Into<String>) {
261        self.proposition = Some(proposition.into());
262        self.arguments.clear();
263        self.current_round = 0;
264    }
265
266    /// Add an argument from an agent
267    pub fn add_argument(&mut self, argument: Argument) {
268        self.arguments.push(argument);
269    }
270
271    /// Get arguments from a specific role
272    pub fn get_arguments_by_role(&self, role: AgentRole) -> Vec<&Argument> {
273        self.arguments.iter().filter(|a| a.role == role).collect()
274    }
275
276    /// Get arguments from a specific round
277    pub fn get_arguments_by_round(&self, round: usize) -> Vec<&Argument> {
278        self.arguments.iter().filter(|a| a.round == round).collect()
279    }
280
281    /// Get the debate transcript
282    pub fn transcript(&self) -> String {
283        let mut output = String::new();
284
285        output.push_str("═══════════════════════════════════════════════════════════════\n");
286        output.push_str("                        DEBATE TRANSCRIPT                       \n");
287        output.push_str("═══════════════════════════════════════════════════════════════\n\n");
288
289        if let Some(ref prop) = self.proposition {
290            output.push_str(&format!("PROPOSITION: {}\n\n", prop));
291        }
292
293        for round in 0..=self.current_round {
294            let round_args = self.get_arguments_by_round(round);
295            if !round_args.is_empty() {
296                output.push_str(&format!(
297                    "━━━ ROUND {} ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n",
298                    round + 1
299                ));
300
301                for arg in round_args {
302                    output.push_str(&format!(
303                        "[{:?}] (Strength: {:.0}%)\n",
304                        arg.role,
305                        arg.strength * 100.0
306                    ));
307                    output.push_str(&format!("{}\n", arg.content));
308
309                    if !arg.rebuttals.is_empty() {
310                        output.push_str("\n  Rebuttals:\n");
311                        for rebuttal in &arg.rebuttals {
312                            output.push_str(&format!(
313                                "    → RE: \"{}\" - {} (effectiveness: {:.0}%)\n",
314                                rebuttal.target_claim,
315                                rebuttal.counter,
316                                rebuttal.effectiveness * 100.0
317                            ));
318                        }
319                    }
320                    output.push('\n');
321                }
322            }
323        }
324
325        output
326    }
327
328    /// Compute debate statistics
329    pub fn compute_stats(&self) -> DebateStats {
330        let advocate_args = self.get_arguments_by_role(AgentRole::Advocate).len();
331        let critic_args = self.get_arguments_by_role(AgentRole::Critic).len();
332
333        let claims_made: usize = self.arguments.iter().map(|a| a.claims.len()).sum();
334        let claims_rebutted: usize = self.arguments.iter().map(|a| a.rebuttals.len()).sum();
335        let evidence_cited: usize = self.arguments.iter().map(|a| a.evidence.len()).sum();
336
337        let avg_strength = if !self.arguments.is_empty() {
338            self.arguments.iter().map(|a| a.strength).sum::<f32>() / self.arguments.len() as f32
339        } else {
340            0.0
341        };
342
343        DebateStats {
344            total_rounds: self.current_round + 1,
345            advocate_arguments: advocate_args,
346            critic_arguments: critic_args,
347            claims_made,
348            claims_rebutted,
349            evidence_cited,
350            concessions_made: self.arguments.iter().map(|a| a.concessions.len()).sum(),
351            avg_argument_strength: avg_strength,
352        }
353    }
354
355    /// Synthesize a verdict from the debate
356    pub fn synthesize_verdict(&self) -> DebateVerdict {
357        let advocate_strength: f32 = self
358            .get_arguments_by_role(AgentRole::Advocate)
359            .iter()
360            .map(|a| a.strength)
361            .sum();
362
363        let critic_strength: f32 = self
364            .get_arguments_by_role(AgentRole::Critic)
365            .iter()
366            .map(|a| a.strength)
367            .sum();
368
369        let advocate_count = self.get_arguments_by_role(AgentRole::Advocate).len() as f32;
370        let critic_count = self.get_arguments_by_role(AgentRole::Critic).len() as f32;
371
372        let avg_advocate = if advocate_count > 0.0 {
373            advocate_strength / advocate_count
374        } else {
375            0.0
376        };
377        let avg_critic = if critic_count > 0.0 {
378            critic_strength / critic_count
379        } else {
380            0.0
381        };
382
383        let (verdict_type, confidence) = if (avg_advocate - avg_critic).abs() < 0.1 {
384            (VerdictType::Balanced, 0.5)
385        } else if avg_advocate > avg_critic + 0.2 {
386            (
387                VerdictType::Affirmed,
388                0.6 + (avg_advocate - avg_critic) * 0.3,
389            )
390        } else if avg_critic > avg_advocate + 0.2 {
391            (
392                VerdictType::Refuted,
393                0.6 + (avg_critic - avg_advocate) * 0.3,
394            )
395        } else if avg_advocate > avg_critic {
396            (
397                VerdictType::PartiallyAffirmed,
398                0.5 + (avg_advocate - avg_critic) * 0.2,
399            )
400        } else if self.arguments.is_empty() {
401            (VerdictType::Inconclusive, 0.0)
402        } else {
403            (VerdictType::RequiresFurtherInvestigation, 0.4)
404        };
405
406        // Extract winning points (strongest claims from winning side)
407        let winning_points = match verdict_type {
408            VerdictType::Affirmed | VerdictType::PartiallyAffirmed => self
409                .get_arguments_by_role(AgentRole::Advocate)
410                .iter()
411                .flat_map(|a| a.claims.iter())
412                .filter(|c| c.confidence > 0.7)
413                .map(|c| c.statement.clone())
414                .take(3)
415                .collect(),
416            VerdictType::Refuted => self
417                .get_arguments_by_role(AgentRole::Critic)
418                .iter()
419                .flat_map(|a| a.claims.iter())
420                .filter(|c| c.confidence > 0.7)
421                .map(|c| c.statement.clone())
422                .take(3)
423                .collect(),
424            _ => Vec::new(),
425        };
426
427        // Collect unresolved points
428        let unresolved: Vec<String> = self
429            .arguments
430            .iter()
431            .flat_map(|a| a.claims.iter())
432            .filter(|c| c.verified == Some(false) || c.confidence < 0.5)
433            .map(|c| c.statement.clone())
434            .take(3)
435            .collect();
436
437        DebateVerdict {
438            verdict_type,
439            summary: format!(
440                "After {} rounds of debate with {} arguments, the proposition is {:?}",
441                self.current_round + 1,
442                self.arguments.len(),
443                verdict_type
444            ),
445            confidence: confidence.min(1.0),
446            winning_points,
447            unresolved,
448            recommendations: vec![],
449        }
450    }
451
452    /// Build the full debate result
453    pub fn build_result(&self) -> DebateResult {
454        DebateResult {
455            proposition: self.proposition.clone().unwrap_or_default(),
456            arguments: self.arguments.clone(),
457            verdict: self.synthesize_verdict(),
458            stats: self.compute_stats(),
459            fact_check_results: HashMap::new(),
460        }
461    }
462
463    /// Reset for a new debate
464    pub fn reset(&mut self) {
465        self.arguments.clear();
466        self.proposition = None;
467        self.current_round = 0;
468    }
469
470    /// Move to next round
471    pub fn next_round(&mut self) {
472        self.current_round += 1;
473    }
474}
475
476impl Default for DebateArena {
477    fn default() -> Self {
478        Self::new(DebateConfig::default())
479    }
480}
481
482/// Prompt templates for debate agents
483pub struct DebatePrompts;
484
485impl DebatePrompts {
486    /// Opening argument for advocate
487    pub fn advocate_opening(proposition: &str) -> String {
488        format!(
489            r#"You are the ADVOCATE in a structured debate.
490
491PROPOSITION: {proposition}
492
493Present your opening argument IN FAVOR of this proposition.
494
495Your argument should include:
4961. THESIS: Your main position (1-2 sentences)
4972. EVIDENCE: 2-3 key pieces of supporting evidence
4983. REASONING: How the evidence supports your thesis
4994. ANTICIPATION: Address likely counterarguments
500
501Be persuasive but factual. Cite sources where possible.
502Rate your confidence in each claim (0-100%).
503
504Respond in this format:
505THESIS: ...
506EVIDENCE: 1. ... 2. ... 3. ...
507REASONING: ...
508ANTICIPATION: ...
509OVERALL_STRENGTH: X%"#,
510            proposition = proposition
511        )
512    }
513
514    /// Opening argument for critic
515    pub fn critic_opening(proposition: &str, advocate_arg: &str) -> String {
516        format!(
517            r#"You are the CRITIC in a structured debate.
518
519PROPOSITION: {proposition}
520
521ADVOCATE'S ARGUMENT:
522{advocate_arg}
523
524Present your critique AGAINST this proposition and the advocate's argument.
525
526Your critique should include:
5271. WEAKNESSES: Key flaws in the advocate's argument
5282. COUNTER-EVIDENCE: Evidence that contradicts the proposition
5293. ALTERNATIVE: Better explanations for the evidence
5304. CONCLUSION: Why the proposition should be rejected or qualified
531
532Be rigorous but fair. Attack the argument, not the arguer.
533Rate your confidence in each counter-claim (0-100%).
534
535Respond in this format:
536WEAKNESSES: 1. ... 2. ... 3. ...
537COUNTER_EVIDENCE: ...
538ALTERNATIVE: ...
539CONCLUSION: ...
540OVERALL_STRENGTH: X%"#,
541            proposition = proposition,
542            advocate_arg = advocate_arg
543        )
544    }
545
546    /// Rebuttal for advocate
547    pub fn advocate_rebuttal(proposition: &str, critic_arg: &str, previous_args: &str) -> String {
548        format!(
549            r#"You are the ADVOCATE in round 2 of a structured debate.
550
551PROPOSITION: {proposition}
552
553CRITIC'S ARGUMENT:
554{critic_arg}
555
556PREVIOUS ARGUMENTS:
557{previous_args}
558
559Rebut the critic's arguments and strengthen your case.
560
561Your rebuttal should:
5621. ADDRESS each of the critic's main points
5632. STRENGTHEN your original argument
5643. PROVIDE new evidence if available
5654. CONCEDE points if they are valid (shows intellectual honesty)
566
567Respond in this format:
568REBUTTALS:
569- RE: "[critic's point]" → [your counter]
570NEW_EVIDENCE: ...
571CONCESSIONS: ... (if any)
572UPDATED_POSITION: ...
573STRENGTH: X%"#,
574            proposition = proposition,
575            critic_arg = critic_arg,
576            previous_args = previous_args
577        )
578    }
579
580    /// Final synthesis
581    pub fn synthesizer_verdict(proposition: &str, transcript: &str) -> String {
582        format!(
583            r#"You are the SYNTHESIZER. Your role is to deliver the final verdict.
584
585PROPOSITION: {proposition}
586
587DEBATE TRANSCRIPT:
588{transcript}
589
590Analyze the debate objectively and deliver your verdict.
591
592Consider:
5931. Which side presented stronger evidence?
5942. Which side had better reasoning?
5953. Were key claims refuted or supported?
5964. What remains uncertain?
597
598Your verdict should include:
5991. VERDICT: Affirmed / Refuted / Balanced / Inconclusive / Partially Affirmed
6002. CONFIDENCE: 0-100%
6013. KEY_FACTORS: What determined the outcome
6024. WINNING_POINTS: Strongest arguments from the winning side
6035. UNRESOLVED: Issues that couldn't be settled
6046. RECOMMENDATIONS: What should be done next
605
606Respond in JSON format."#,
607            proposition = proposition,
608            transcript = transcript
609        )
610    }
611
612    /// Fact-checker prompt
613    pub fn fact_checker(claims: &str) -> String {
614        format!(
615            r#"You are the FACT-CHECKER. Verify the factual accuracy of these claims.
616
617CLAIMS TO VERIFY:
618{claims}
619
620For each claim, provide:
6211. CLAIM: The exact claim
6222. VERDICT: True / False / Partially True / Unverifiable
6233. CONFIDENCE: 0-100%
6244. SOURCE: Evidence for your verdict
6255. CORRECTION: If false/partial, what is correct
626
627Respond in JSON format with an array of fact-check results."#,
628            claims = claims
629        )
630    }
631}
632
633#[cfg(test)]
634mod tests {
635    use super::*;
636
637    #[test]
638    fn test_debate_arena_creation() {
639        let arena = DebateArena::new(DebateConfig::default());
640        assert_eq!(arena.config.rounds, 3);
641        assert!(arena.proposition.is_none());
642    }
643
644    #[test]
645    fn test_add_argument() {
646        let mut arena = DebateArena::new(DebateConfig::default());
647        arena.set_proposition("AI is beneficial");
648
649        arena.add_argument(Argument {
650            role: AgentRole::Advocate,
651            content: "AI improves productivity".into(),
652            round: 0,
653            claims: vec![Claim {
654                statement: "AI saves time".into(),
655                confidence: 0.9,
656                verified: None,
657            }],
658            evidence: vec![],
659            strength: 0.8,
660            rebuttals: vec![],
661            concessions: vec![],
662        });
663
664        assert_eq!(arena.arguments.len(), 1);
665    }
666
667    #[test]
668    fn test_verdict_synthesis() {
669        let mut arena = DebateArena::new(DebateConfig::default());
670        arena.set_proposition("Test proposition");
671
672        arena.add_argument(Argument {
673            role: AgentRole::Advocate,
674            content: "Strong argument for".into(),
675            round: 0,
676            claims: vec![Claim {
677                statement: "Claim 1".into(),
678                confidence: 0.9,
679                verified: Some(true),
680            }],
681            evidence: vec![],
682            strength: 0.9,
683            rebuttals: vec![],
684            concessions: vec![],
685        });
686
687        arena.add_argument(Argument {
688            role: AgentRole::Critic,
689            content: "Weak argument against".into(),
690            round: 0,
691            claims: vec![],
692            evidence: vec![],
693            strength: 0.4,
694            rebuttals: vec![],
695            concessions: vec![],
696        });
697
698        let verdict = arena.synthesize_verdict();
699        assert!(matches!(
700            verdict.verdict_type,
701            VerdictType::Affirmed | VerdictType::PartiallyAffirmed
702        ));
703    }
704
705    #[test]
706    fn test_agent_role_prompts() {
707        assert!(AgentRole::Advocate.system_prompt().contains("FAVOR"));
708        assert!(AgentRole::Critic.system_prompt().contains("FLAWS"));
709        assert!(AgentRole::Synthesizer.system_prompt().contains("weigh"));
710    }
711
712    #[test]
713    fn test_debate_stats() {
714        let mut arena = DebateArena::new(DebateConfig::default());
715        arena.set_proposition("Test");
716
717        arena.add_argument(Argument {
718            role: AgentRole::Advocate,
719            content: "Arg 1".into(),
720            round: 0,
721            claims: vec![
722                Claim {
723                    statement: "C1".into(),
724                    confidence: 0.9,
725                    verified: None,
726                },
727                Claim {
728                    statement: "C2".into(),
729                    confidence: 0.8,
730                    verified: None,
731                },
732            ],
733            evidence: vec![Evidence {
734                description: "E1".into(),
735                source: Some("Source 1".into()),
736                credibility: 0.9,
737            }],
738            strength: 0.85,
739            rebuttals: vec![],
740            concessions: vec![],
741        });
742
743        let stats = arena.compute_stats();
744        assert_eq!(stats.advocate_arguments, 1);
745        assert_eq!(stats.claims_made, 2);
746        assert_eq!(stats.evidence_cited, 1);
747    }
748}